1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1987-1995 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cg87.d, backend/cg87.d) 10 */ 11 12 module dmd.backend.cg87; 13 14 version (SCPP) 15 version = COMPILE; 16 version (MARS) 17 version = COMPILE; 18 19 version (COMPILE) 20 { 21 22 import core.stdc.stdio; 23 import core.stdc.stdlib; 24 import core.stdc.string; 25 26 import dmd.backend.barray; 27 import dmd.backend.cc; 28 import dmd.backend.cdef; 29 import dmd.backend.code; 30 import dmd.backend.code_x86; 31 import dmd.backend.codebuilder; 32 import dmd.backend.mem; 33 import dmd.backend.el; 34 import dmd.backend.global; 35 import dmd.backend.oper; 36 import dmd.backend.ty; 37 import dmd.backend.evalu8 : el_toldoubled; 38 39 extern (C++): 40 41 nothrow: 42 43 // NOTE: this could be a TLS global which would allow this variable to be used in 44 // a multi-threaded version of the backend 45 __gshared Globals87 global87; 46 47 private: 48 49 int REGSIZE(); 50 51 private extern (D) uint mask(uint m) { return 1 << m; } 52 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op); 53 54 55 // Constants that the 8087 supports directly 56 // BUG: rewrite for 80 bit long doubles 57 enum PI = 3.14159265358979323846; 58 enum LOG2 = 0.30102999566398119521; 59 enum LN2 = 0.6931471805599453094172321; 60 enum LOG2T = 3.32192809488736234787; 61 enum LOG2E = 1.4426950408889634074; // 1/LN2 62 63 enum FWAIT = 0x9B; // FWAIT opcode 64 65 /* Mark variable referenced by e as not a register candidate */ 66 uint notreg(elem* e) { return e.EV.Vsym.Sflags &= ~GTregcand; } 67 68 /* Generate the appropriate ESC instruction */ 69 ubyte ESC(uint MF, uint b) { return cast(ubyte)(0xD8 + (MF << 1) + b); } 70 enum 71 { // Values for MF 72 MFfloat = 0, 73 MFlong = 1, 74 MFdouble = 2, 75 MFword = 3 76 } 77 78 /********************************* 79 */ 80 81 struct Dconst 82 { 83 int round; 84 Symbol *roundto0; 85 Symbol *roundtonearest; 86 } 87 88 private __gshared Dconst oldd; 89 90 enum NDPP = 0; // print out debugging info 91 bool NOSAHF() { return I64 || config.fpxmmregs; } // can't use SAHF instruction 92 93 enum CW_roundto0 = 0xFBF; 94 enum CW_roundtonearest = 0x3BF; 95 96 97 /********************************** 98 * When we need to temporarilly save 8087 registers, we record information 99 * about the save into an array of NDP structs. 100 */ 101 102 private void getlvalue87(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk) 103 { 104 // the x87 instructions cannot read XMM registers 105 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 106 e.EV.Vsym.Sflags &= ~GTregcand; 107 108 getlvalue(cdb, pcs, e, keepmsk); 109 if (ADDFWAIT()) 110 pcs.Iflags |= CFwait; 111 if (I32) 112 pcs.Iflags &= ~CFopsize; 113 else if (I64) 114 pcs.Irex &= ~REX_W; 115 } 116 117 /**************************************** 118 * Store/load to ndp save location i 119 */ 120 121 private void ndp_fstp(ref CodeBuilder cdb, int i, tym_t ty) 122 { 123 switch (tybasic(ty)) 124 { 125 case TYfloat: 126 case TYifloat: 127 case TYcfloat: 128 cdb.genc1(0xD9,modregrm(2,3,BPRM),FLndp,i); // FSTP m32real i[BP] 129 break; 130 131 case TYdouble: 132 case TYdouble_alias: 133 case TYidouble: 134 case TYcdouble: 135 cdb.genc1(0xDD,modregrm(2,3,BPRM),FLndp,i); // FSTP m64real i[BP] 136 break; 137 138 case TYldouble: 139 case TYildouble: 140 case TYcldouble: 141 cdb.genc1(0xDB,modregrm(2,7,BPRM),FLndp,i); // FSTP m80real i[BP] 142 break; 143 144 default: 145 assert(0); 146 } 147 } 148 149 private void ndp_fld(ref CodeBuilder cdb, int i, tym_t ty) 150 { 151 switch (tybasic(ty)) 152 { 153 case TYfloat: 154 case TYifloat: 155 case TYcfloat: 156 cdb.genc1(0xD9,modregrm(2,0,BPRM),FLndp,i); 157 break; 158 159 case TYdouble: 160 case TYdouble_alias: 161 case TYidouble: 162 case TYcdouble: 163 cdb.genc1(0xDD,modregrm(2,0,BPRM),FLndp,i); 164 break; 165 166 case TYldouble: 167 case TYildouble: 168 case TYcldouble: 169 cdb.genc1(0xDB,modregrm(2,5,BPRM),FLndp,i); // FLD m80real i[BP] 170 break; 171 172 default: 173 assert(0); 174 } 175 } 176 177 /************************** 178 * Return index of empty slot in global87.save[]. 179 */ 180 181 private int getemptyslot() 182 { 183 int i; 184 185 for (i = 0; i < global87.save.length; ++i) 186 if (global87.save[i].e == null) 187 return i; 188 189 global87.save.push(NDP()); 190 return i; 191 } 192 193 /********************************* 194 * Pop 8087 stack. 195 */ 196 197 void pop87() { pop87(__LINE__, __FILE__); } 198 199 void pop87(int line, const(char)* file) 200 { 201 int i; 202 203 if (NDPP) 204 printf("pop87(%s(%d): stackused=%d)\n", file, line, global87.stackused); 205 206 --global87.stackused; 207 assert(global87.stackused >= 0); 208 for (i = 0; i < global87.stack.length - 1; i++) 209 global87.stack[i] = global87.stack[i + 1]; 210 // end of stack is nothing 211 global87.stack[$ - 1] = NDP(); 212 } 213 214 215 /******************************* 216 * Push 8087 stack. Generate and return any code 217 * necessary to preserve anything that might run off the end of the stack. 218 */ 219 220 void push87(ref CodeBuilder cdb) { push87(cdb,__LINE__,__FILE__); } 221 222 void push87(ref CodeBuilder cdb, int line, const(char)* file) 223 { 224 // if we would lose the top register off of the stack 225 if (global87.stack[7].e != null) 226 { 227 int i = getemptyslot(); 228 global87.save[i] = global87.stack[7]; 229 cdb.genf2(0xD9,0xF6); // FDECSTP 230 genfwait(cdb); 231 ndp_fstp(cdb, i, global87.stack[7].e.Ety); // FSTP i[BP] 232 assert(global87.stackused == 8); 233 if (NDPP) printf("push87() : overflow\n"); 234 } 235 else 236 { 237 if (NDPP) printf("push87(%s(%d): %d)\n", file, line, global87.stackused); 238 global87.stackused++; 239 assert(global87.stackused <= 8); 240 } 241 // Shift the stack up 242 for (int i = 7; i > 0; i--) 243 global87.stack[i] = global87.stack[i - 1]; 244 global87.stack[0] = NDP(); 245 } 246 247 /***************************** 248 * Note elem e as being in ST(i) as being a value we want to keep. 249 */ 250 251 void note87(elem *e, uint offset, int i) 252 { 253 note87(e, offset, i, __LINE__); 254 } 255 256 void note87(elem *e, uint offset, int i, int linnum) 257 { 258 if (NDPP) 259 printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum); 260 261 static if (0) 262 { 263 if (global87.stack[i].e) 264 printf("global87.stack[%d].e = %p\n",i,global87.stack[i].e); 265 } 266 267 debug if (i >= global87.stackused) 268 { 269 printf("note87(e = %p.%d, i = %d, stackused = %d, line = %d)\n",e,offset,i,global87.stackused,linnum); 270 elem_print(e); 271 } 272 assert(i < global87.stackused); 273 274 while (e.Eoper == OPcomma) 275 e = e.EV.E2; 276 global87.stack[i].e = e; 277 global87.stack[i].offset = offset; 278 } 279 280 /**************************************************** 281 * Exchange two entries in 8087 stack. 282 */ 283 284 void xchg87(int i, int j) 285 { 286 NDP save; 287 288 save = global87.stack[i]; 289 global87.stack[i] = global87.stack[j]; 290 global87.stack[j] = save; 291 } 292 293 /**************************** 294 * Make sure that elem e is in register ST(i). Reload it if necessary. 295 * Input: 296 * i 0..3 8087 register number 297 * flag 1 don't bother with FXCH 298 */ 299 300 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag) 301 { 302 makesure87(cdb,e,offset,i,flag,__LINE__); 303 } 304 305 private void makesure87(ref CodeBuilder cdb,elem *e,uint offset,int i,uint flag,int linnum) 306 { 307 debug if (NDPP) printf("makesure87(e=%p, offset=%d, i=%d, flag=%d, line=%d)\n",e,offset,i,flag,linnum); 308 309 while (e.Eoper == OPcomma) 310 e = e.EV.E2; 311 assert(e && i < 4); 312 L1: 313 if (global87.stack[i].e != e || global87.stack[i].offset != offset) 314 { 315 debug if (global87.stack[i].e) 316 printf("global87.stack[%d].e = %p, .offset = %d\n",i,global87.stack[i].e,global87.stack[i].offset); 317 318 assert(global87.stack[i].e == null); 319 int j; 320 for (j = 0; 1; j++) 321 { 322 if (j >= global87.save.length && e.Eoper == OPcomma) 323 { 324 e = e.EV.E2; // try right side 325 goto L1; 326 } 327 328 debug if (j >= global87.save.length) 329 printf("e = %p, global87.save.length = %llu\n",e, cast(ulong) global87.save.length); 330 331 assert(j < global87.save.length); 332 //printf("\tglobal87.save[%d] = %p, .offset = %d\n", j, global87.save[j].e, global87.save[j].offset); 333 if (e == global87.save[j].e && offset == global87.save[j].offset) 334 break; 335 } 336 push87(cdb); 337 genfwait(cdb); 338 ndp_fld(cdb, j, e.Ety); // FLD j[BP] 339 if (!(flag & 1)) 340 { 341 while (i != 0) 342 { 343 cdb.genf2(0xD9,0xC8 + i); // FXCH ST(i) 344 i--; 345 } 346 } 347 global87.save[j] = NDP(); // back in 8087 348 } 349 //global87.stack[i].e = null; 350 } 351 352 /**************************** 353 * Save in memory any values in the 8087 that we want to keep. 354 */ 355 356 void save87(ref CodeBuilder cdb) 357 { 358 bool any = false; 359 while (global87.stack[0].e && global87.stackused) 360 { 361 // Save it 362 int i = getemptyslot(); 363 if (NDPP) printf("saving %p in temporary global87.save[%d]\n",global87.stack[0].e,i); 364 global87.save[i] = global87.stack[0]; 365 366 genfwait(cdb); 367 ndp_fstp(cdb,i,global87.stack[0].e.Ety); // FSTP i[BP] 368 pop87(); 369 any = true; 370 } 371 if (any) // if any stores 372 genfwait(cdb); // wait for last one to finish 373 } 374 375 /****************************************** 376 * Save any noted values that would be destroyed by n pushes 377 */ 378 379 void save87regs(ref CodeBuilder cdb, uint n) 380 { 381 assert(n <= 7); 382 uint j = 8 - n; 383 if (global87.stackused > j) 384 { 385 for (uint k = 8; k > j; k--) 386 { 387 cdb.genf2(0xD9,0xF6); // FDECSTP 388 genfwait(cdb); 389 if (k <= global87.stackused) 390 { 391 int i = getemptyslot(); 392 ndp_fstp(cdb, i, global87.stack[k - 1].e.Ety); // FSTP i[BP] 393 global87.save[i] = global87.stack[k - 1]; 394 global87.stack[k - 1] = NDP(); 395 } 396 } 397 398 for (uint k = 8; k > j; k--) 399 { 400 if (k > global87.stackused) 401 { cdb.genf2(0xD9,0xF7); // FINCSTP 402 genfwait(cdb); 403 } 404 } 405 global87.stackused = j; 406 } 407 } 408 409 /***************************************************** 410 * Save/restore ST0 or ST01 411 */ 412 413 void gensaverestore87(regm_t regm, ref CodeBuilder cdbsave, ref CodeBuilder cdbrestore) 414 { 415 //printf("gensaverestore87(%s)\n", regm_str(regm)); 416 assert(regm == mST0 || regm == mST01); 417 418 int i = getemptyslot(); 419 global87.save[i].e = el_calloc(); // this blocks slot [i] for the life of this function 420 ndp_fstp(cdbsave, i, TYldouble); 421 422 CodeBuilder cdb2a; 423 cdb2a.ctor(); 424 ndp_fld(cdb2a, i, TYldouble); 425 426 if (regm == mST01) 427 { 428 int j = getemptyslot(); 429 global87.save[j].e = el_calloc(); 430 ndp_fstp(cdbsave, j, TYldouble); 431 ndp_fld(cdbrestore, j, TYldouble); 432 } 433 434 cdbrestore.append(cdb2a); 435 } 436 437 /************************************* 438 * Find which, if any, slot on stack holds elem e. 439 */ 440 441 private int cse_get(elem *e, uint offset) 442 { 443 int i; 444 445 for (i = 0; 1; i++) 446 { 447 if (i == global87.stackused) 448 { 449 i = -1; 450 //printf("cse not found\n"); 451 //elem_print(e); 452 break; 453 } 454 if (global87.stack[i].e == e && 455 global87.stack[i].offset == offset) 456 { //printf("cse found %d\n",i); 457 //elem_print(e); 458 break; 459 } 460 } 461 return i; 462 } 463 464 /************************************* 465 * Reload common subexpression. 466 */ 467 468 void comsub87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 469 { 470 //printf("comsub87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 471 // Look on 8087 stack 472 int i = cse_get(e, 0); 473 474 if (tycomplex(e.Ety)) 475 { 476 uint sz = tysize(e.Ety); 477 int j = cse_get(e, sz / 2); 478 if (i >= 0 && j >= 0) 479 { 480 push87(cdb); 481 push87(cdb); 482 cdb.genf2(0xD9,0xC0 + i); // FLD ST(i) 483 cdb.genf2(0xD9,0xC0 + j + 1); // FLD ST(j + 1) 484 fixresult_complex87(cdb,e,mST01,pretregs); 485 } 486 else 487 // Reload 488 loaddata(cdb,e,pretregs); 489 } 490 else 491 { 492 if (i >= 0) 493 { 494 push87(cdb); 495 cdb.genf2(0xD9,0xC0 + i); // FLD ST(i) 496 if (*pretregs & XMMREGS) 497 fixresult87(cdb,e,mST0,pretregs); 498 else 499 fixresult(cdb,e,mST0,pretregs); 500 } 501 else 502 // Reload 503 loaddata(cdb,e,pretregs); 504 } 505 } 506 507 508 /******************************* 509 * Decide if we need to gen an FWAIT. 510 */ 511 512 void genfwait(ref CodeBuilder cdb) 513 { 514 if (ADDFWAIT()) 515 cdb.gen1(FWAIT); 516 } 517 518 519 /*************************** 520 * Put the 8087 flags into the CPU flags. 521 */ 522 523 private void cg87_87topsw(ref CodeBuilder cdb) 524 { 525 /* Note that SAHF is not available on some early I64 processors 526 * and will cause a seg fault 527 */ 528 assert(!NOSAHF); 529 getregs(cdb,mAX); 530 if (config.target_cpu >= TARGET_80286) 531 cdb.genf2(0xDF,0xE0); // FSTSW AX 532 else 533 { 534 cdb.genfltreg(0xD8+5,7,0); // FSTSW floatreg[BP] 535 genfwait(cdb); // FWAIT 536 cdb.genfltreg(0x8A,4,1); // MOV AH,floatreg+1[BP] 537 } 538 cdb.gen1(0x9E); // SAHF 539 code_orflag(cdb.last(),CFpsw); 540 } 541 542 /***************************************** 543 * Jump to ctarget if condition code C2 is set. 544 */ 545 546 private void genjmpifC2(ref CodeBuilder cdb, code *ctarget) 547 { 548 if (NOSAHF) 549 { 550 getregs(cdb,mAX); 551 cdb.genf2(0xDF,0xE0); // FSTSW AX 552 cdb.genc2(0xF6,modregrm(3,0,4),4); // TEST AH,4 553 genjmp(cdb, JNE, FLcode, cast(block *)ctarget); // JNE ctarget 554 } 555 else 556 { 557 cg87_87topsw(cdb); 558 genjmp(cdb, JP, FLcode, cast(block *)ctarget); // JP ctarget 559 } 560 } 561 562 /*************************** 563 * Set the PSW based on the state of ST0. 564 * Input: 565 * pop if stack should be popped after test 566 * Returns: 567 * start of code appended to c. 568 */ 569 570 private void genftst(ref CodeBuilder cdb,elem *e,int pop) 571 { 572 if (NOSAHF) 573 { 574 push87(cdb); 575 cdb.gen2(0xD9,0xEE); // FLDZ 576 cdb.gen2(0xDF,0xE9); // FUCOMIP ST1 577 pop87(); 578 if (pop) 579 { 580 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 581 pop87(); 582 } 583 } 584 else if (config.flags4 & CFG4fastfloat) // if fast floating point 585 { 586 cdb.genf2(0xD9,0xE4); // FTST 587 cg87_87topsw(cdb); // put 8087 flags in CPU flags 588 if (pop) 589 { 590 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 591 pop87(); 592 } 593 } 594 else if (config.target_cpu >= TARGET_80386) 595 { 596 // FUCOMP doesn't raise exceptions on QNANs, unlike FTST 597 push87(cdb); 598 cdb.gen2(0xD9,0xEE); // FLDZ 599 cdb.gen2(pop ? 0xDA : 0xDD,0xE9); // FUCOMPP / FUCOMP 600 pop87(); 601 if (pop) 602 pop87(); 603 cg87_87topsw(cdb); // put 8087 flags in CPU flags 604 } 605 else 606 { 607 // Call library function which does not raise exceptions 608 regm_t regm = 0; 609 610 callclib(cdb,e,CLIB.ftest,®m,0); 611 if (pop) 612 { 613 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 614 pop87(); 615 } 616 } 617 } 618 619 /************************************* 620 * Determine if there is a special 8087 instruction to load 621 * constant e. 622 * Input: 623 * im 0 load real part 624 * 1 load imaginary part 625 * Returns: 626 * opcode if found 627 * 0 if not 628 */ 629 630 ubyte loadconst(elem *e, int im) 631 { 632 elem_debug(e); 633 assert(im == 0 || im == 1); 634 635 immutable float[7] fval = 636 [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2]; 637 immutable double[7] dval = 638 [0.0,1.0,PI,LOG2T,LOG2E,LOG2,LN2]; 639 640 static if (real.sizeof < 10) 641 { 642 import dmd.root.longdouble; 643 immutable targ_ldouble[7] ldval = 644 [ld_zero,ld_one,ld_pi,ld_log2t,ld_log2e,ld_log2,ld_ln2]; 645 } 646 else 647 { 648 enum M_PI_L = 0x1.921fb54442d1846ap+1L; // 3.14159 fldpi 649 enum M_LOG2T_L = 0x1.a934f0979a3715fcp+1L; // 3.32193 fldl2t 650 enum M_LOG2E_L = 0x1.71547652b82fe178p+0L; // 1.4427 fldl2e 651 enum M_LOG2_L = 0x1.34413509f79fef32p-2L; // 0.30103 fldlg2 652 enum M_LN2_L = 0x1.62e42fefa39ef358p-1L; // 0.693147 fldln2 653 immutable targ_ldouble[7] ldval = 654 [0.0,1.0,M_PI_L,M_LOG2T_L,M_LOG2E_L,M_LOG2_L,M_LN2_L]; 655 } 656 657 immutable ubyte[7 + 1] opcode = 658 /* FLDZ,FLD1,FLDPI,FLDL2T,FLDL2E,FLDLG2,FLDLN2,0 */ 659 [0xEE,0xE8,0xEB,0xE9,0xEA,0xEC,0xED,0]; 660 661 int i; 662 targ_float f; 663 targ_double d; 664 targ_ldouble ld; 665 int sz; 666 int zero; 667 void *p; 668 immutable ubyte[16] zeros; 669 670 if (im == 0) 671 { 672 switch (tybasic(e.Ety)) 673 { 674 case TYfloat: 675 case TYifloat: 676 case TYcfloat: 677 f = e.EV.Vfloat; 678 sz = 4; 679 p = &f; 680 break; 681 682 case TYdouble: 683 case TYdouble_alias: 684 case TYidouble: 685 case TYcdouble: 686 d = e.EV.Vdouble; 687 sz = 8; 688 p = &d; 689 break; 690 691 case TYldouble: 692 case TYildouble: 693 case TYcldouble: 694 ld = e.EV.Vldouble; 695 sz = 10; 696 p = &ld; 697 break; 698 699 default: 700 assert(0); 701 } 702 } 703 else 704 { 705 switch (tybasic(e.Ety)) 706 { 707 case TYcfloat: 708 f = e.EV.Vcfloat.im; 709 sz = 4; 710 p = &f; 711 break; 712 713 case TYcdouble: 714 d = e.EV.Vcdouble.im; 715 sz = 8; 716 p = &d; 717 break; 718 719 case TYcldouble: 720 ld = e.EV.Vcldouble.im; 721 sz = 10; 722 p = &ld; 723 break; 724 725 default: 726 assert(0); 727 } 728 } 729 730 // Note that for this purpose, -0 is not regarded as +0, 731 // since FLDZ loads a +0 732 assert(sz <= zeros.length); 733 zero = (memcmp(p, zeros.ptr, sz) == 0); 734 if (zero && config.target_cpu >= TARGET_PentiumPro) 735 return 0xEE; // FLDZ is the only one with 1 micro-op 736 737 // For some reason, these instructions take more clocks 738 if (config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 739 return 0; 740 741 if (zero) 742 return 0xEE; 743 744 for (i = 1; i < fval.length; i++) 745 { 746 switch (sz) 747 { 748 case 4: 749 if (fval[i] != f) 750 continue; 751 break; 752 case 8: 753 if (dval[i] != d) 754 continue; 755 break; 756 case 10: 757 if (ldval[i] != ld) 758 continue; 759 break; 760 default: 761 assert(0); 762 } 763 break; 764 } 765 return opcode[i]; 766 } 767 768 /****************************** 769 * Given the result of an expression is in retregs, 770 * generate necessary code to return result in *pretregs. 771 */ 772 773 774 void fixresult87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs) 775 { 776 //printf("fixresult87(e = %p, retregs = x%x, *pretregs = x%x)\n", e,retregs,*pretregs); 777 //printf("fixresult87(e = %p, retregs = %s, *pretregs = %s)\n", e,regm_str(retregs),regm_str(*pretregs)); 778 assert(!*pretregs || retregs); 779 780 if ((*pretregs | retregs) & mST01) 781 { 782 fixresult_complex87(cdb, e, retregs, pretregs); 783 return; 784 } 785 786 tym_t tym = tybasic(e.Ety); 787 uint sz = _tysize[tym]; 788 //printf("tym = x%x, sz = %d\n", tym, sz); 789 790 /* if retregs needs to be transferred into the 8087 */ 791 if (*pretregs & mST0 && retregs & (mBP | ALLREGS)) 792 { 793 debug if (sz > DOUBLESIZE) 794 { 795 elem_print(e); 796 printf("retregs = %s\n", regm_str(retregs)); 797 } 798 assert(sz <= DOUBLESIZE); 799 if (!I16) 800 { 801 802 if (*pretregs & mPSW) 803 { // Set flags 804 regm_t r = retregs | mPSW; 805 fixresult(cdb,e,retregs,&r); 806 } 807 push87(cdb); 808 if (sz == REGSIZE || (I64 && sz == 4)) 809 { 810 const reg = findreg(retregs); 811 cdb.genfltreg(STO,reg,0); // MOV fltreg,reg 812 cdb.genfltreg(0xD9,0,0); // FLD float ptr fltreg 813 } 814 else 815 { 816 const msreg = findregmsw(retregs); 817 const lsreg = findreglsw(retregs); 818 cdb.genfltreg(STO,lsreg,0); // MOV fltreg,lsreg 819 cdb.genfltreg(STO,msreg,4); // MOV fltreg+4,msreg 820 cdb.genfltreg(0xDD,0,0); // FLD double ptr fltreg 821 } 822 } 823 else 824 { 825 regm_t regm = (sz == FLOATSIZE) ? FLOATREGS : DOUBLEREGS; 826 regm |= *pretregs & mPSW; 827 fixresult(cdb,e,retregs,®m); 828 regm = 0; // don't worry about result from CLIB.xxx 829 callclib(cdb,e, 830 ((sz == FLOATSIZE) ? CLIB.fltto87 : CLIB.dblto87), 831 ®m,0); 832 } 833 } 834 else if (*pretregs & (mBP | ALLREGS) && retregs & mST0) 835 { 836 assert(sz <= DOUBLESIZE); 837 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 838 if (*pretregs & mPSW && !(retregs & mPSW)) 839 genftst(cdb,e,0); 840 // FSTP floatreg 841 pop87(); 842 cdb.genfltreg(ESC(mf,1),3,0); 843 genfwait(cdb); 844 reg_t reg; 845 allocreg(cdb,pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble); 846 if (sz == FLOATSIZE) 847 { 848 if (!I16) 849 cdb.genfltreg(LOD,reg,0); 850 else 851 { 852 cdb.genfltreg(LOD,reg,REGSIZE); 853 cdb.genfltreg(LOD,findreglsw(*pretregs),0); 854 } 855 } 856 else 857 { assert(sz == DOUBLESIZE); 858 if (I16) 859 { 860 cdb.genfltreg(LOD,AX,6); 861 cdb.genfltreg(LOD,BX,4); 862 cdb.genfltreg(LOD,CX,2); 863 cdb.genfltreg(LOD,DX,0); 864 } 865 else if (I32) 866 { 867 cdb.genfltreg(LOD,reg,REGSIZE); 868 cdb.genfltreg(LOD,findreglsw(*pretregs),0); 869 } 870 else // I64 871 { 872 cdb.genfltreg(LOD,reg,0); 873 code_orrex(cdb.last(), REX_W); 874 } 875 } 876 } 877 else if (*pretregs == 0 && retregs == mST0) 878 { 879 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 880 pop87(); 881 } 882 else 883 { 884 if (*pretregs & mPSW) 885 { 886 if (!(retregs & mPSW)) 887 { 888 genftst(cdb,e,!(*pretregs & (mST0 | XMMREGS))); // FTST 889 } 890 } 891 if (*pretregs & mST0 && retregs & XMMREGS) 892 { 893 assert(sz <= DOUBLESIZE); 894 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 895 // MOVD floatreg,XMM? 896 const reg = findreg(retregs); 897 cdb.genxmmreg(xmmstore(tym),reg,0,tym); 898 push87(cdb); 899 cdb.genfltreg(ESC(mf,1),0,0); // FLD float/double ptr fltreg 900 } 901 else if (retregs & mST0 && *pretregs & XMMREGS) 902 { 903 assert(sz <= DOUBLESIZE); 904 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 905 // FSTP floatreg 906 pop87(); 907 cdb.genfltreg(ESC(mf,1),3,0); 908 genfwait(cdb); 909 // MOVD XMM?,floatreg 910 reg_t reg; 911 allocreg(cdb,pretregs,®,(sz == FLOATSIZE) ? TYfloat : TYdouble); 912 cdb.genxmmreg(xmmload(tym),reg,0,tym); 913 } 914 else 915 assert(!(*pretregs & mST0) || (retregs & mST0)); 916 } 917 if (*pretregs & mST0) 918 note87(e,0,0); 919 } 920 921 /******************************** 922 * Generate in-line 8087 code for the following operators: 923 * add 924 * min 925 * mul 926 * div 927 * cmp 928 */ 929 930 // Reverse the order that the op is done in 931 __gshared const ubyte[9] oprev = [ cast(ubyte)-1,0,1,2,3,5,4,7,6 ]; 932 933 void orth87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 934 { 935 //printf("orth87(+e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 936 // we could be evaluating / for side effects only 937 assert(*pretregs != 0); 938 939 elem *e1 = e.EV.E1; 940 elem *e2 = e.EV.E2; 941 uint sz2 = tysize(e1.Ety); 942 if (tycomplex(e1.Ety)) 943 sz2 /= 2; 944 945 OPER eoper = e.Eoper; 946 if (eoper == OPmul && e2.Eoper == OPconst && el_toldoubled(e.EV.E2) == 2.0L) 947 { 948 // Perform "mul 2.0" as fadd ST(0), ST 949 regm_t retregs = mST0; 950 codelem(cdb,e1,&retregs,false); 951 cdb.genf2(0xDC, 0xC0); // fadd ST(0), ST; 952 fixresult87(cdb,e,mST0,pretregs); // result is in ST(0). 953 freenode(e2); 954 return; 955 } 956 957 uint op; 958 if (OTrel(eoper)) 959 eoper = OPeqeq; 960 bool imaginary; 961 static uint X(OPER op, uint ty1, uint ty2) { return (op << 16) + ty1 * 256 + ty2; } 962 switch (X(eoper, tybasic(e1.Ety), tybasic(e2.Ety))) 963 { 964 case X(OPadd, TYfloat, TYfloat): 965 case X(OPadd, TYdouble, TYdouble): 966 case X(OPadd, TYdouble_alias, TYdouble_alias): 967 case X(OPadd, TYldouble, TYldouble): 968 case X(OPadd, TYldouble, TYdouble): 969 case X(OPadd, TYdouble, TYldouble): 970 case X(OPadd, TYifloat, TYifloat): 971 case X(OPadd, TYidouble, TYidouble): 972 case X(OPadd, TYildouble, TYildouble): 973 op = 0; // FADDP 974 break; 975 976 case X(OPmin, TYfloat, TYfloat): 977 case X(OPmin, TYdouble, TYdouble): 978 case X(OPmin, TYdouble_alias, TYdouble_alias): 979 case X(OPmin, TYldouble, TYldouble): 980 case X(OPmin, TYldouble, TYdouble): 981 case X(OPmin, TYdouble, TYldouble): 982 case X(OPmin, TYifloat, TYifloat): 983 case X(OPmin, TYidouble, TYidouble): 984 case X(OPmin, TYildouble, TYildouble): 985 op = 4; // FSUBP 986 break; 987 988 case X(OPmul, TYfloat, TYfloat): 989 case X(OPmul, TYdouble, TYdouble): 990 case X(OPmul, TYdouble_alias, TYdouble_alias): 991 case X(OPmul, TYldouble, TYldouble): 992 case X(OPmul, TYldouble, TYdouble): 993 case X(OPmul, TYdouble, TYldouble): 994 case X(OPmul, TYifloat, TYifloat): 995 case X(OPmul, TYidouble, TYidouble): 996 case X(OPmul, TYildouble, TYildouble): 997 case X(OPmul, TYfloat, TYifloat): 998 case X(OPmul, TYdouble, TYidouble): 999 case X(OPmul, TYldouble, TYildouble): 1000 case X(OPmul, TYifloat, TYfloat): 1001 case X(OPmul, TYidouble, TYdouble): 1002 case X(OPmul, TYildouble, TYldouble): 1003 op = 1; // FMULP 1004 break; 1005 1006 case X(OPdiv, TYfloat, TYfloat): 1007 case X(OPdiv, TYdouble, TYdouble): 1008 case X(OPdiv, TYdouble_alias, TYdouble_alias): 1009 case X(OPdiv, TYldouble, TYldouble): 1010 case X(OPdiv, TYldouble, TYdouble): 1011 case X(OPdiv, TYdouble, TYldouble): 1012 case X(OPdiv, TYifloat, TYifloat): 1013 case X(OPdiv, TYidouble, TYidouble): 1014 case X(OPdiv, TYildouble, TYildouble): 1015 op = 6; // FDIVP 1016 break; 1017 1018 case X(OPmod, TYfloat, TYfloat): 1019 case X(OPmod, TYdouble, TYdouble): 1020 case X(OPmod, TYdouble_alias, TYdouble_alias): 1021 case X(OPmod, TYldouble, TYldouble): 1022 case X(OPmod, TYfloat, TYifloat): 1023 case X(OPmod, TYdouble, TYidouble): 1024 case X(OPmod, TYldouble, TYildouble): 1025 case X(OPmod, TYifloat, TYifloat): 1026 case X(OPmod, TYidouble, TYidouble): 1027 case X(OPmod, TYildouble, TYildouble): 1028 case X(OPmod, TYifloat, TYfloat): 1029 case X(OPmod, TYidouble, TYdouble): 1030 case X(OPmod, TYildouble, TYldouble): 1031 op = cast(uint) -1; 1032 break; 1033 1034 case X(OPeqeq, TYfloat, TYfloat): 1035 case X(OPeqeq, TYdouble, TYdouble): 1036 case X(OPeqeq, TYdouble_alias, TYdouble_alias): 1037 case X(OPeqeq, TYldouble, TYldouble): 1038 case X(OPeqeq, TYifloat, TYifloat): 1039 case X(OPeqeq, TYidouble, TYidouble): 1040 case X(OPeqeq, TYildouble, TYildouble): 1041 { 1042 assert(OTrel(e.Eoper)); 1043 assert((*pretregs & mST0) == 0); 1044 regm_t retregs = mST0; 1045 codelem(cdb,e1,&retregs,false); 1046 note87(e1,0,0); 1047 regm_t resregm = mPSW; 1048 1049 if (rel_exception(e.Eoper) || config.flags4 & CFG4fastfloat) 1050 { 1051 if (e2.Eoper == OPconst && !boolres(e2)) 1052 { 1053 if (NOSAHF) 1054 { 1055 push87(cdb); 1056 cdb.gen2(0xD9,0xEE); // FLDZ 1057 cdb.gen2(0xDF,0xF1); // FCOMIP ST1 1058 pop87(); 1059 } 1060 else 1061 { 1062 cdb.genf2(0xD9,0xE4); // FTST 1063 cg87_87topsw(cdb); 1064 } 1065 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 1066 pop87(); 1067 } 1068 else if (NOSAHF) 1069 { 1070 note87(e1,0,0); 1071 load87(cdb,e2,0,&retregs,e1,-1); 1072 makesure87(cdb,e1,0,1,0); 1073 resregm = 0; 1074 //cdb.genf2(0xD9,0xC8 + 1); // FXCH ST1 1075 cdb.gen2(0xDF,0xF1); // FCOMIP ST1 1076 pop87(); 1077 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 1078 pop87(); 1079 } 1080 else 1081 { 1082 load87(cdb,e2, 0, pretregs, e1, 3); // FCOMPP 1083 } 1084 } 1085 else 1086 { 1087 if (e2.Eoper == OPconst && !boolres(e2) && 1088 config.target_cpu < TARGET_80386) 1089 { 1090 regm_t regm = 0; 1091 1092 callclib(cdb,e,CLIB.ftest0,®m,0); 1093 pop87(); 1094 } 1095 else 1096 { 1097 note87(e1,0,0); 1098 load87(cdb,e2,0,&retregs,e1,-1); 1099 makesure87(cdb,e1,0,1,0); 1100 resregm = 0; 1101 if (NOSAHF) 1102 { 1103 cdb.gen2(0xDF,0xE9); // FUCOMIP ST1 1104 pop87(); 1105 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 1106 pop87(); 1107 } 1108 else if (config.target_cpu >= TARGET_80386) 1109 { 1110 cdb.gen2(0xDA,0xE9); // FUCOMPP 1111 cg87_87topsw(cdb); 1112 pop87(); 1113 pop87(); 1114 } 1115 else 1116 // Call a function instead so that exceptions 1117 // are not generated. 1118 callclib(cdb,e,CLIB.fcompp,&resregm,0); 1119 } 1120 } 1121 1122 freenode(e2); 1123 return; 1124 } 1125 1126 case X(OPadd, TYcfloat, TYcfloat): 1127 case X(OPadd, TYcdouble, TYcdouble): 1128 case X(OPadd, TYcldouble, TYcldouble): 1129 case X(OPadd, TYcfloat, TYfloat): 1130 case X(OPadd, TYcdouble, TYdouble): 1131 case X(OPadd, TYcldouble, TYldouble): 1132 case X(OPadd, TYfloat, TYcfloat): 1133 case X(OPadd, TYdouble, TYcdouble): 1134 case X(OPadd, TYldouble, TYcldouble): 1135 goto Lcomplex; 1136 1137 case X(OPadd, TYifloat, TYcfloat): 1138 case X(OPadd, TYidouble, TYcdouble): 1139 case X(OPadd, TYildouble, TYcldouble): 1140 goto Lcomplex2; 1141 1142 case X(OPmin, TYcfloat, TYcfloat): 1143 case X(OPmin, TYcdouble, TYcdouble): 1144 case X(OPmin, TYcldouble, TYcldouble): 1145 case X(OPmin, TYcfloat, TYfloat): 1146 case X(OPmin, TYcdouble, TYdouble): 1147 case X(OPmin, TYcldouble, TYldouble): 1148 case X(OPmin, TYfloat, TYcfloat): 1149 case X(OPmin, TYdouble, TYcdouble): 1150 case X(OPmin, TYldouble, TYcldouble): 1151 goto Lcomplex; 1152 1153 case X(OPmin, TYifloat, TYcfloat): 1154 case X(OPmin, TYidouble, TYcdouble): 1155 case X(OPmin, TYildouble, TYcldouble): 1156 goto Lcomplex2; 1157 1158 case X(OPmul, TYcfloat, TYcfloat): 1159 case X(OPmul, TYcdouble, TYcdouble): 1160 case X(OPmul, TYcldouble, TYcldouble): 1161 goto Lcomplex; 1162 1163 case X(OPdiv, TYcfloat, TYcfloat): 1164 case X(OPdiv, TYcdouble, TYcdouble): 1165 case X(OPdiv, TYcldouble, TYcldouble): 1166 case X(OPdiv, TYfloat, TYcfloat): 1167 case X(OPdiv, TYdouble, TYcdouble): 1168 case X(OPdiv, TYldouble, TYcldouble): 1169 case X(OPdiv, TYifloat, TYcfloat): 1170 case X(OPdiv, TYidouble, TYcdouble): 1171 case X(OPdiv, TYildouble, TYcldouble): 1172 goto Lcomplex; 1173 1174 case X(OPdiv, TYifloat, TYfloat): 1175 case X(OPdiv, TYidouble, TYdouble): 1176 case X(OPdiv, TYildouble, TYldouble): 1177 op = 6; // FDIVP 1178 break; 1179 1180 Lcomplex: 1181 { 1182 loadComplex(cdb,e1); 1183 loadComplex(cdb,e2); 1184 makesure87(cdb, e1, sz2, 2, 0); 1185 makesure87(cdb, e1, 0, 3, 0); 1186 regm_t retregs = mST01; 1187 if (eoper == OPadd) 1188 { 1189 cdb.genf2(0xDE, 0xC0+2); // FADDP ST(2),ST 1190 cdb.genf2(0xDE, 0xC0+2); // FADDP ST(2),ST 1191 pop87(); 1192 pop87(); 1193 } 1194 else if (eoper == OPmin) 1195 { 1196 cdb.genf2(0xDE, 0xE8+2); // FSUBP ST(2),ST 1197 cdb.genf2(0xDE, 0xE8+2); // FSUBP ST(2),ST 1198 pop87(); 1199 pop87(); 1200 } 1201 else 1202 { 1203 int clib = eoper == OPmul ? CLIB.cmul : CLIB.cdiv; 1204 callclib(cdb, e, clib, &retregs, 0); 1205 } 1206 fixresult_complex87(cdb, e, retregs, pretregs); 1207 return; 1208 } 1209 1210 Lcomplex2: 1211 { 1212 regm_t retregs = mST0; 1213 codelem(cdb,e1, &retregs, false); 1214 note87(e1, 0, 0); 1215 loadComplex(cdb,e2); 1216 makesure87(cdb, e1, 0, 2, 0); 1217 retregs = mST01; 1218 if (eoper == OPadd) 1219 { 1220 cdb.genf2(0xDE, 0xC0+2); // FADDP ST(2),ST 1221 } 1222 else if (eoper == OPmin) 1223 { 1224 cdb.genf2(0xDE, 0xE8+2); // FSUBP ST(2),ST 1225 cdb.genf2(0xD9, 0xE0); // FCHS 1226 } 1227 else 1228 assert(0); 1229 pop87(); 1230 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1231 fixresult_complex87(cdb, e, retregs, pretregs); 1232 return; 1233 } 1234 1235 case X(OPeqeq, TYcfloat, TYcfloat): 1236 case X(OPeqeq, TYcdouble, TYcdouble): 1237 case X(OPeqeq, TYcldouble, TYcldouble): 1238 case X(OPeqeq, TYcfloat, TYifloat): 1239 case X(OPeqeq, TYcdouble, TYidouble): 1240 case X(OPeqeq, TYcldouble, TYildouble): 1241 case X(OPeqeq, TYcfloat, TYfloat): 1242 case X(OPeqeq, TYcdouble, TYdouble): 1243 case X(OPeqeq, TYcldouble, TYldouble): 1244 case X(OPeqeq, TYifloat, TYcfloat): 1245 case X(OPeqeq, TYidouble, TYcdouble): 1246 case X(OPeqeq, TYildouble, TYcldouble): 1247 case X(OPeqeq, TYfloat, TYcfloat): 1248 case X(OPeqeq, TYdouble, TYcdouble): 1249 case X(OPeqeq, TYldouble, TYcldouble): 1250 case X(OPeqeq, TYfloat, TYifloat): 1251 case X(OPeqeq, TYdouble, TYidouble): 1252 case X(OPeqeq, TYldouble, TYildouble): 1253 case X(OPeqeq, TYifloat, TYfloat): 1254 case X(OPeqeq, TYidouble, TYdouble): 1255 case X(OPeqeq, TYildouble, TYldouble): 1256 { 1257 loadComplex(cdb,e1); 1258 loadComplex(cdb,e2); 1259 makesure87(cdb, e1, sz2, 2, 0); 1260 makesure87(cdb, e1, 0, 3, 0); 1261 regm_t retregs = 0; 1262 callclib(cdb, e, CLIB.ccmp, &retregs, 0); 1263 return; 1264 } 1265 1266 case X(OPadd, TYfloat, TYifloat): 1267 case X(OPadd, TYdouble, TYidouble): 1268 case X(OPadd, TYldouble, TYildouble): 1269 case X(OPadd, TYifloat, TYfloat): 1270 case X(OPadd, TYidouble, TYdouble): 1271 case X(OPadd, TYildouble, TYldouble): 1272 1273 case X(OPmin, TYfloat, TYifloat): 1274 case X(OPmin, TYdouble, TYidouble): 1275 case X(OPmin, TYldouble, TYildouble): 1276 case X(OPmin, TYifloat, TYfloat): 1277 case X(OPmin, TYidouble, TYdouble): 1278 case X(OPmin, TYildouble, TYldouble): 1279 { 1280 regm_t retregs = mST0; 1281 codelem(cdb,e1, &retregs, false); 1282 note87(e1, 0, 0); 1283 codelem(cdb,e2, &retregs, false); 1284 makesure87(cdb, e1, 0, 1, 0); 1285 if (eoper == OPmin) 1286 cdb.genf2(0xD9, 0xE0); // FCHS 1287 if (tyimaginary(e1.Ety)) 1288 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1289 retregs = mST01; 1290 fixresult_complex87(cdb, e, retregs, pretregs); 1291 return; 1292 } 1293 1294 case X(OPadd, TYcfloat, TYifloat): 1295 case X(OPadd, TYcdouble, TYidouble): 1296 case X(OPadd, TYcldouble, TYildouble): 1297 op = 0; 1298 goto Lci; 1299 1300 case X(OPmin, TYcfloat, TYifloat): 1301 case X(OPmin, TYcdouble, TYidouble): 1302 case X(OPmin, TYcldouble, TYildouble): 1303 op = 4; 1304 goto Lci; 1305 1306 Lci: 1307 { 1308 loadComplex(cdb,e1); 1309 regm_t retregs = mST0; 1310 load87(cdb,e2,sz2,&retregs,e1,op); 1311 freenode(e2); 1312 retregs = mST01; 1313 makesure87(cdb, e1,0,1,0); 1314 fixresult_complex87(cdb,e, retregs, pretregs); 1315 return; 1316 } 1317 1318 case X(OPmul, TYcfloat, TYfloat): 1319 case X(OPmul, TYcdouble, TYdouble): 1320 case X(OPmul, TYcldouble, TYldouble): 1321 imaginary = false; 1322 goto Lcmul; 1323 1324 case X(OPmul, TYcfloat, TYifloat): 1325 case X(OPmul, TYcdouble, TYidouble): 1326 case X(OPmul, TYcldouble, TYildouble): 1327 imaginary = true; 1328 Lcmul: 1329 { 1330 loadComplex(cdb,e1); 1331 if (imaginary) 1332 { 1333 cdb.genf2(0xD9, 0xE0); // FCHS 1334 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1335 if (elemisone(e2)) 1336 { 1337 freenode(e2); 1338 fixresult_complex87(cdb, e, mST01, pretregs); 1339 return; 1340 } 1341 } 1342 regm_t retregs = mST0; 1343 codelem(cdb,e2, &retregs, false); 1344 makesure87(cdb, e1, sz2, 1, 0); 1345 makesure87(cdb, e1, 0, 2, 0); 1346 cdb.genf2(0xDC,0xC8 + 2); // FMUL ST(2), ST 1347 cdb.genf2(0xDE,0xC8 + 1); // FMULP ST(1), ST 1348 pop87(); 1349 fixresult_complex87(cdb, e, mST01, pretregs); 1350 return; 1351 } 1352 1353 case X(OPmul, TYfloat, TYcfloat): 1354 case X(OPmul, TYdouble, TYcdouble): 1355 case X(OPmul, TYldouble, TYcldouble): 1356 imaginary = false; 1357 goto Lcmul2; 1358 1359 case X(OPmul, TYifloat, TYcfloat): 1360 case X(OPmul, TYidouble, TYcdouble): 1361 case X(OPmul, TYildouble, TYcldouble): 1362 imaginary = true; 1363 Lcmul2: 1364 { 1365 regm_t retregs = mST0; 1366 codelem(cdb,e1, &retregs, false); 1367 note87(e1, 0, 0); 1368 loadComplex(cdb,e2); 1369 makesure87(cdb, e1, 0, 2, 0); 1370 cdb.genf2(0xD9, imaginary ? 0xE0 : 0xC8 + 1); // FCHS / FXCH ST(1) 1371 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 1372 cdb.genf2(0xDC,0xC8 + 2); // FMUL ST(2), ST 1373 cdb.genf2(0xDE,0xC8 + 1); // FMULP ST(1), ST 1374 pop87(); 1375 fixresult_complex87(cdb, e, mST01, pretregs); 1376 return; 1377 } 1378 1379 case X(OPdiv, TYcfloat, TYfloat): 1380 case X(OPdiv, TYcdouble, TYdouble): 1381 case X(OPdiv, TYcldouble, TYldouble): 1382 { 1383 loadComplex(cdb,e1); 1384 regm_t retregs = mST0; 1385 codelem(cdb,e2, &retregs, false); 1386 makesure87(cdb, e1, sz2, 1, 0); 1387 makesure87(cdb, e1, 0, 2, 0); 1388 cdb.genf2(0xDC,0xF8 + 2); // FDIV ST(2), ST 1389 cdb.genf2(0xDE,0xF8 + 1); // FDIVP ST(1), ST 1390 pop87(); 1391 fixresult_complex87(cdb, e, mST01, pretregs); 1392 return; 1393 } 1394 1395 case X(OPdiv, TYcfloat, TYifloat): 1396 case X(OPdiv, TYcdouble, TYidouble): 1397 case X(OPdiv, TYcldouble, TYildouble): 1398 { 1399 loadComplex(cdb,e1); 1400 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1401 xchg87(0, 1); 1402 cdb.genf2(0xD9, 0xE0); // FCHS 1403 regm_t retregs = mST0; 1404 codelem(cdb,e2, &retregs, false); 1405 makesure87(cdb, e1, 0, 1, 0); 1406 makesure87(cdb, e1, sz2, 2, 0); 1407 cdb.genf2(0xDC,0xF8 + 2); // FDIV ST(2), ST 1408 cdb.genf2(0xDE,0xF8 + 1); // FDIVP ST(1), ST 1409 pop87(); 1410 fixresult_complex87(cdb, e, mST01, pretregs); 1411 return; 1412 } 1413 1414 case X(OPmod, TYcfloat, TYfloat): 1415 case X(OPmod, TYcdouble, TYdouble): 1416 case X(OPmod, TYcldouble, TYldouble): 1417 case X(OPmod, TYcfloat, TYifloat): 1418 case X(OPmod, TYcdouble, TYidouble): 1419 case X(OPmod, TYcldouble, TYildouble): 1420 { 1421 /* 1422 fld E1.re 1423 fld E1.im 1424 fld E2 1425 fxch ST(1) 1426 FM1: fprem 1427 fstsw word ptr sw 1428 fwait 1429 mov AH, byte ptr sw+1 1430 jp FM1 1431 fxch ST(2) 1432 FM2: fprem 1433 fstsw word ptr sw 1434 fwait 1435 mov AH, byte ptr sw+1 1436 jp FM2 1437 fstp ST(1) 1438 fxch ST(1) 1439 */ 1440 loadComplex(cdb,e1); 1441 regm_t retregs = mST0; 1442 codelem(cdb,e2, &retregs, false); 1443 makesure87(cdb, e1, sz2, 1, 0); 1444 makesure87(cdb, e1, 0, 2, 0); 1445 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1446 1447 cdb.gen2(0xD9, 0xF8); // FPREM 1448 code *cfm1 = cdb.last(); 1449 genjmpifC2(cdb, cfm1); // JC2 FM1 1450 cdb.genf2(0xD9, 0xC8 + 2); // FXCH ST(2) 1451 1452 cdb.gen2(0xD9, 0xF8); // FPREM 1453 code *cfm2 = cdb.last(); 1454 1455 genjmpifC2(cdb, cfm2); // JC2 FM2 1456 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 1457 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 1458 1459 pop87(); 1460 fixresult_complex87(cdb, e, mST01, pretregs); 1461 return; 1462 } 1463 1464 default: 1465 1466 debug 1467 elem_print(e); 1468 1469 assert(0); 1470 } 1471 1472 int reverse = 0; 1473 int e2oper = e2.Eoper; 1474 1475 /* Move double-sized operand into the second position if there's a chance 1476 * it will allow combining a load with an operation (DMD Bugzilla 2905) 1477 */ 1478 if ( ((tybasic(e1.Ety) == TYdouble) 1479 && ((e1.Eoper == OPvar) || (e1.Eoper == OPconst)) 1480 && (tybasic(e2.Ety) != TYdouble)) || 1481 (e1.Eoper == OPconst) || 1482 (e1.Eoper == OPvar && 1483 ((e1.Ety & (mTYconst | mTYimmutable) && !OTleaf(e2oper)) || 1484 (e2oper == OPd_f && 1485 (e2.EV.E1.Eoper == OPs32_d || e2.EV.E1.Eoper == OPs64_d || e2.EV.E1.Eoper == OPs16_d) && 1486 e2.EV.E1.EV.E1.Eoper == OPvar 1487 ) || 1488 ((e2oper == OPs32_d || e2oper == OPs64_d || e2oper == OPs16_d) && 1489 e2.EV.E1.Eoper == OPvar 1490 ) 1491 ) 1492 ) 1493 ) 1494 { // Reverse order of evaluation 1495 e1 = e.EV.E2; 1496 e2 = e.EV.E1; 1497 op = oprev[op + 1]; 1498 reverse ^= 1; 1499 } 1500 1501 regm_t retregs1 = mST0; 1502 codelem(cdb,e1,&retregs1,false); 1503 note87(e1,0,0); 1504 1505 if (config.flags4 & CFG4fdivcall && e.Eoper == OPdiv) 1506 { 1507 regm_t retregs = mST0; 1508 load87(cdb,e2,0,&retregs,e1,-1); 1509 makesure87(cdb, e1,0,1,0); 1510 if (op == 7) // if reverse divide 1511 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1512 callclib(cdb,e,CLIB.fdiv87,&retregs,0); 1513 pop87(); 1514 regm_t resregm = mST0; 1515 freenode(e2); 1516 fixresult87(cdb,e,resregm,pretregs); 1517 } 1518 else if (e.Eoper == OPmod) 1519 { 1520 /* 1521 * fld tbyte ptr y 1522 * fld tbyte ptr x // ST = x, ST1 = y 1523 * FM1: // We don't use fprem1 because for some inexplicable 1524 * // reason we get -5 when we do _modulo(15, 10) 1525 * fprem // ST = ST % ST1 1526 * fstsw word ptr sw 1527 * fwait 1528 * mov AH,byte ptr sw+1 // get msb of status word in AH 1529 * sahf // transfer to flags 1530 * jp FM1 // continue till ST < ST1 1531 * fstp ST(1) // leave remainder on stack 1532 */ 1533 regm_t retregs = mST0; 1534 load87(cdb,e2,0,&retregs,e1,-1); 1535 makesure87(cdb,e1,0,1,0); // now have x,y on stack; need y,x 1536 if (!reverse) // if not reverse modulo 1537 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 1538 1539 cdb.gen2(0xD9, 0xF8); // FM1: FPREM 1540 code *cfm1 = cdb.last(); 1541 genjmpifC2(cdb, cfm1); // JC2 FM1 1542 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 1543 1544 pop87(); 1545 freenode(e2); 1546 fixresult87(cdb,e,mST0,pretregs); 1547 } 1548 else 1549 { 1550 load87(cdb,e2,0,pretregs,e1,op); 1551 freenode(e2); 1552 } 1553 if (*pretregs & mST0) 1554 note87(e,0,0); 1555 //printf("orth87(-e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1556 } 1557 1558 /***************************** 1559 * Load e into ST01. 1560 */ 1561 1562 private void loadComplex(ref CodeBuilder cdb,elem *e) 1563 { 1564 regm_t retregs; 1565 1566 int sz = tysize(e.Ety); 1567 switch (tybasic(e.Ety)) 1568 { 1569 case TYfloat: 1570 case TYdouble: 1571 case TYldouble: 1572 retregs = mST0; 1573 codelem(cdb,e,&retregs,false); 1574 // Convert to complex with a 0 for the imaginary part 1575 push87(cdb); 1576 cdb.gen2(0xD9,0xEE); // FLDZ 1577 break; 1578 1579 case TYifloat: 1580 case TYidouble: 1581 case TYildouble: 1582 // Convert to complex with a 0 for the real part 1583 push87(cdb); 1584 cdb.gen2(0xD9,0xEE); // FLDZ 1585 retregs = mST0; 1586 codelem(cdb,e,&retregs,false); 1587 break; 1588 1589 case TYcfloat: 1590 case TYcdouble: 1591 case TYcldouble: 1592 sz /= 2; 1593 retregs = mST01; 1594 codelem(cdb,e,&retregs,false); 1595 break; 1596 1597 default: 1598 assert(0); 1599 } 1600 note87(e, 0, 1); 1601 note87(e, sz, 0); 1602 } 1603 1604 /************************* 1605 * If op == -1, load expression e into ST0. 1606 * else compute (eleft op e), eleft is in ST0. 1607 * Must follow same logic as cmporder87(); 1608 */ 1609 1610 void load87(ref CodeBuilder cdb,elem *e,uint eoffset,regm_t *pretregs,elem *eleft,OPER op) 1611 { 1612 code cs; 1613 regm_t retregs; 1614 reg_t reg; 1615 uint mf1; 1616 ubyte ldop; 1617 int i; 1618 1619 if (NDPP) 1620 printf("+load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused); 1621 1622 assert(!(NOSAHF && op == 3)); 1623 elem_debug(e); 1624 if (ADDFWAIT()) 1625 cs.Iflags = CFwait; 1626 else 1627 cs.Iflags = 0; 1628 cs.Irex = 0; 1629 OPER opr = oprev[op + 1]; 1630 tym_t ty = tybasic(e.Ety); 1631 uint mf = (ty == TYfloat || ty == TYifloat || ty == TYcfloat) ? MFfloat : MFdouble; 1632 if ((ty == TYldouble || ty == TYildouble) && 1633 op != -1 && e.Eoper != OPd_ld) 1634 goto Ldefault; 1635 L5: 1636 switch (e.Eoper) 1637 { 1638 case OPcomma: 1639 docommas(cdb,&e); 1640 goto L5; 1641 1642 case OPvar: 1643 notreg(e); 1644 goto L2; 1645 1646 case OPind: 1647 L2: 1648 if (op != -1) 1649 { 1650 if (e.Ecount && e.Ecount != e.Ecomsub && 1651 (i = cse_get(e, 0)) >= 0) 1652 { 1653 immutable ubyte[8] b2 = [0xC0,0xC8,0xD0,0xD8,0xE0,0xE8,0xF0,0xF8]; 1654 1655 cdb.genf2(0xD8,b2[op] + i); // Fop ST(i) 1656 } 1657 else 1658 { 1659 getlvalue87(cdb,&cs,e,0); 1660 makesure87(cdb,eleft,eoffset,0,0); 1661 cs.Iop = ESC(mf,0); 1662 cs.Irm |= modregrm(0,op,0); 1663 cdb.gen(&cs); 1664 } 1665 } 1666 else 1667 { 1668 push87(cdb); 1669 switch (ty) 1670 { 1671 case TYfloat: 1672 case TYdouble: 1673 case TYifloat: 1674 case TYidouble: 1675 case TYcfloat: 1676 case TYcdouble: 1677 case TYdouble_alias: 1678 loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0); // FLD var 1679 break; 1680 case TYldouble: 1681 case TYildouble: 1682 case TYcldouble: 1683 loadea(cdb,e,&cs,0xDB,5,0,0,0); // FLD var 1684 break; 1685 default: 1686 printf("ty = x%x\n", ty); 1687 assert(0); 1688 } 1689 note87(e,0,0); 1690 } 1691 break; 1692 1693 case OPd_f: 1694 case OPf_d: 1695 case OPd_ld: 1696 mf1 = (tybasic(e.EV.E1.Ety) == TYfloat || tybasic(e.EV.E1.Ety) == TYifloat) 1697 ? MFfloat : MFdouble; 1698 if (op != -1 && global87.stackused) 1699 note87(eleft,eoffset,0); // don't trash this value 1700 if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind) 1701 { 1702 static if (1) 1703 { 1704 L4: 1705 getlvalue87(cdb,&cs,e.EV.E1,0); 1706 cs.Iop = ESC(mf1,0); 1707 if (op != -1) 1708 { 1709 cs.Irm |= modregrm(0,op,0); 1710 makesure87(cdb,eleft,eoffset,0,0); 1711 } 1712 else 1713 { 1714 cs.Iop |= 1; 1715 push87(cdb); 1716 } 1717 cdb.gen(&cs); // FLD / Fop 1718 } 1719 else 1720 { 1721 loadea(cdb,e.EV.E1,&cs,ESC(mf1,1),0,0,0,0); /* FLD e.EV.E1 */ 1722 } 1723 1724 // Variable cannot be put into a register anymore 1725 if (e.EV.E1.Eoper == OPvar) 1726 notreg(e.EV.E1); 1727 freenode(e.EV.E1); 1728 } 1729 else 1730 { 1731 retregs = mST0; 1732 codelem(cdb,e.EV.E1,&retregs,false); 1733 if (op != -1) 1734 { 1735 makesure87(cdb,eleft,eoffset,1,0); 1736 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1737 pop87(); 1738 } 1739 } 1740 break; 1741 1742 case OPs64_d: 1743 if (e.EV.E1.Eoper == OPvar || 1744 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0)) 1745 { 1746 getlvalue87(cdb,&cs,e.EV.E1,0); 1747 cs.Iop = 0xDF; 1748 push87(cdb); 1749 cs.Irm |= modregrm(0,5,0); 1750 cdb.gen(&cs); // FILD m64 1751 // Variable cannot be put into a register anymore 1752 if (e.EV.E1.Eoper == OPvar) 1753 notreg(e.EV.E1); 1754 freenode(e.EV.E1); 1755 } 1756 else if (I64) 1757 { 1758 retregs = ALLREGS; 1759 codelem(cdb,e.EV.E1,&retregs,false); 1760 reg = findreg(retregs); 1761 cdb.genfltreg(STO,reg,0); // MOV floatreg,reg 1762 code_orrex(cdb.last(), REX_W); 1763 push87(cdb); 1764 cdb.genfltreg(0xDF,5,0); // FILD long long ptr floatreg 1765 } 1766 else 1767 { 1768 retregs = ALLREGS; 1769 codelem(cdb,e.EV.E1,&retregs,false); 1770 reg = findreglsw(retregs); 1771 cdb.genfltreg(STO,reg,0); // MOV floatreg,reglsw 1772 reg = findregmsw(retregs); 1773 cdb.genfltreg(STO,reg,4); // MOV floatreg+4,regmsw 1774 push87(cdb); 1775 cdb.genfltreg(0xDF,5,0); // FILD long long ptr floatreg 1776 } 1777 if (op != -1) 1778 { 1779 makesure87(cdb,eleft,eoffset,1,0); 1780 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1781 pop87(); 1782 } 1783 break; 1784 1785 case OPconst: 1786 ldop = loadconst(e, 0); 1787 if (ldop) 1788 { 1789 push87(cdb); 1790 cdb.genf2(0xD9,ldop); // FLDx 1791 if (op != -1) 1792 { 1793 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1794 pop87(); 1795 } 1796 } 1797 else 1798 { 1799 assert(0); 1800 } 1801 break; 1802 1803 case OPu16_d: 1804 { 1805 /* This opcode should never be generated */ 1806 /* (probably shouldn't be for 16 bit code too) */ 1807 assert(!I32); 1808 1809 if (op != -1) 1810 note87(eleft,eoffset,0); // don't trash this value 1811 retregs = ALLREGS & mLSW; 1812 codelem(cdb,e.EV.E1,&retregs,false); 1813 regwithvalue(cdb,ALLREGS & mMSW,0,®,0); // 0-extend 1814 retregs |= mask(reg); 1815 mf1 = MFlong; 1816 goto L3; 1817 } 1818 1819 case OPs16_d: mf1 = MFword; goto L6; 1820 case OPs32_d: mf1 = MFlong; goto L6; 1821 L6: 1822 if (e.Ecount) 1823 goto Ldefault; 1824 if (op != -1) 1825 note87(eleft,eoffset,0); // don't trash this value 1826 if (e.EV.E1.Eoper == OPvar || 1827 (e.EV.E1.Eoper == OPind && e.EV.E1.Ecount == 0)) 1828 { 1829 goto L4; 1830 } 1831 else 1832 { 1833 retregs = ALLREGS; 1834 codelem(cdb,e.EV.E1,&retregs,false); 1835 L3: 1836 if (I16 && e.Eoper != OPs16_d) 1837 { 1838 /* MOV floatreg+2,reg */ 1839 reg = findregmsw(retregs); 1840 cdb.genfltreg(STO,reg,REGSIZE); 1841 retregs &= mLSW; 1842 } 1843 reg = findreg(retregs); 1844 cdb.genfltreg(STO,reg,0); // MOV floatreg,reg 1845 if (op != -1) 1846 { 1847 makesure87(cdb,eleft,eoffset,0,0); 1848 cdb.genfltreg(ESC(mf1,0),op,0); // Fop floatreg 1849 } 1850 else 1851 { 1852 /* FLD long ptr floatreg */ 1853 push87(cdb); 1854 cdb.genfltreg(ESC(mf1,1),0,0); 1855 } 1856 } 1857 break; 1858 default: 1859 Ldefault: 1860 retregs = mST0; 1861 codelem(cdb,e,&retregs,2); 1862 1863 if (op != -1) 1864 { 1865 makesure87(cdb,eleft,eoffset,1,(op == 0 || op == 1)); 1866 pop87(); 1867 if (op == 4 || op == 6) // sub or div 1868 { 1869 code *cl = cdb.last(); 1870 if (cl && cl.Iop == 0xD9 && cl.Irm == 0xC9) // FXCH ST(1) 1871 { cl.Iop = NOP; 1872 opr = op; // reverse operands 1873 } 1874 } 1875 cdb.genf2(0xDE,modregrm(3,opr,1)); // FopRP 1876 } 1877 break; 1878 } 1879 if (op == 3) // FCOMP 1880 { pop87(); // extra pop was done 1881 cg87_87topsw(cdb); 1882 } 1883 fixresult87(cdb,e,((op == 3) ? mPSW : mST0),pretregs); 1884 if (NDPP) 1885 printf("-load87(e=%p, eoffset=%d, *pretregs=%s, eleft=%p, op=%d, stackused = %d)\n",e,eoffset,regm_str(*pretregs),eleft,op,global87.stackused); 1886 } 1887 1888 /******************************** 1889 * Determine if a compare is to be done forwards (return 0) 1890 * or backwards (return 1). 1891 * Must follow same logic as load87(). 1892 */ 1893 1894 int cmporder87(elem *e) 1895 { 1896 //printf("cmporder87(%p)\n",e); 1897 L1: 1898 switch (e.Eoper) 1899 { 1900 case OPcomma: 1901 e = e.EV.E2; 1902 goto L1; 1903 1904 case OPd_f: 1905 case OPf_d: 1906 case OPd_ld: 1907 if (e.EV.E1.Eoper == OPvar || e.EV.E1.Eoper == OPind) 1908 goto ret0; 1909 else 1910 goto ret1; 1911 1912 case OPconst: 1913 if (loadconst(e, 0) || tybasic(e.Ety) == TYldouble 1914 || tybasic(e.Ety) == TYildouble) 1915 { 1916 //printf("ret 1, loadconst(e) = %d\n", loadconst(e)); 1917 goto ret1; 1918 } 1919 goto ret0; 1920 1921 case OPvar: 1922 case OPind: 1923 if (tybasic(e.Ety) == TYldouble || 1924 tybasic(e.Ety) == TYildouble) 1925 goto ret1; 1926 goto ret0; 1927 1928 case OPu16_d: 1929 case OPs16_d: 1930 case OPs32_d: 1931 goto ret0; 1932 1933 case OPs64_d: 1934 goto ret1; 1935 1936 default: 1937 goto ret1; 1938 } 1939 1940 ret1: 1941 return 1; 1942 1943 ret0: 1944 return 0; 1945 } 1946 1947 /******************************* 1948 * Perform an assignment to a long double/double/float. 1949 */ 1950 1951 void eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1952 { 1953 code cs; 1954 opcode_t op1; 1955 uint op2; 1956 1957 //printf("+eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1958 assert(e.Eoper == OPeq); 1959 regm_t retregs = mST0 | (*pretregs & mPSW); 1960 codelem(cdb,e.EV.E2,&retregs,false); 1961 tym_t ty1 = tybasic(e.EV.E1.Ety); 1962 switch (ty1) 1963 { 1964 case TYdouble_alias: 1965 case TYidouble: 1966 case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 1967 1968 case TYifloat: 1969 case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 1970 1971 case TYildouble: 1972 case TYldouble: op1 = 0xDB; op2 = 7; break; 1973 1974 default: 1975 assert(0); 1976 } 1977 if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too 1978 { 1979 if (ty1 == TYldouble || ty1 == TYildouble) 1980 { 1981 push87(cdb); 1982 cdb.genf2(0xD9,0xC0); // FLD ST(0) 1983 pop87(); 1984 } 1985 else 1986 op2 = 2; // FST e.EV.E1 1987 } 1988 else 1989 { // FSTP e.EV.E1 1990 pop87(); 1991 } 1992 1993 static if (0) 1994 { 1995 // Doesn't work if ST(0) gets saved to the stack by getlvalue() 1996 loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0); 1997 } 1998 else 1999 { 2000 cs.Irex = 0; 2001 cs.Iflags = 0; 2002 cs.Iop = op1; 2003 if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) // if want result on stack too 2004 { // Make sure it's still there 2005 elem *e2 = e.EV.E2; 2006 while (e2.Eoper == OPcomma) 2007 e2 = e2.EV.E2; 2008 note87(e2,0,0); 2009 getlvalue87(cdb, &cs, e.EV.E1, 0); 2010 makesure87(cdb,e2,0,0,1); 2011 } 2012 else 2013 { 2014 getlvalue87(cdb, &cs, e.EV.E1, 0); 2015 } 2016 cs.Irm |= modregrm(0,op2,0); // OR in reg field 2017 cdb.gen(&cs); 2018 if (tysize(TYldouble) == 12) 2019 { 2020 /* This deals with the fact that 10 byte reals really 2021 * occupy 12 bytes by zeroing the extra 2 bytes. 2022 */ 2023 if (op1 == 0xDB) 2024 { 2025 cs.Iop = 0xC7; // MOV EA+10,0 2026 NEWREG(cs.Irm, 0); 2027 cs.IEV1.Voffset += 10; 2028 cs.IFL2 = FLconst; 2029 cs.IEV2.Vint = 0; 2030 cs.Iflags |= CFopsize; 2031 cdb.gen(&cs); 2032 } 2033 } 2034 else if (tysize(TYldouble) == 16) 2035 { 2036 /* This deals with the fact that 10 byte reals really 2037 * occupy 16 bytes by zeroing the extra 6 bytes. 2038 */ 2039 if (op1 == 0xDB) 2040 { 2041 cs.Irex &= ~REX_W; 2042 cs.Iop = 0xC7; // MOV EA+10,0 2043 NEWREG(cs.Irm, 0); 2044 cs.IEV1.Voffset += 10; 2045 cs.IFL2 = FLconst; 2046 cs.IEV2.Vint = 0; 2047 cs.Iflags |= CFopsize; 2048 cdb.gen(&cs); 2049 2050 cs.IEV1.Voffset += 2; 2051 cs.Iflags &= ~CFopsize; 2052 cdb.gen(&cs); 2053 } 2054 } 2055 } 2056 genfwait(cdb); 2057 freenode(e.EV.E1); 2058 fixresult87(cdb,e,mST0 | mPSW,pretregs); 2059 } 2060 2061 /******************************* 2062 * Perform an assignment to a long double/double/float. 2063 */ 2064 2065 void complex_eq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2066 { 2067 code cs; 2068 opcode_t op1; 2069 uint op2; 2070 uint sz; 2071 int fxch = 0; 2072 2073 //printf("complex_eq87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2074 assert(e.Eoper == OPeq); 2075 cs.Iflags = ADDFWAIT() ? CFwait : 0; 2076 cs.Irex = 0; 2077 regm_t retregs = mST01 | (*pretregs & mPSW); 2078 codelem(cdb,e.EV.E2,&retregs,false); 2079 tym_t ty1 = tybasic(e.EV.E1.Ety); 2080 switch (ty1) 2081 { 2082 case TYcdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 2083 case TYcfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 2084 case TYcldouble: op1 = 0xDB; op2 = 7; break; 2085 default: 2086 assert(0); 2087 } 2088 if (*pretregs & (mST01 | mXMM0 | mXMM1)) // if want result on stack too 2089 { 2090 if (ty1 == TYcldouble) 2091 { 2092 push87(cdb); 2093 push87(cdb); 2094 cdb.genf2(0xD9,0xC0 + 1); // FLD ST(1) 2095 cdb.genf2(0xD9,0xC0 + 1); // FLD ST(1) 2096 pop87(); 2097 pop87(); 2098 } 2099 else 2100 { op2 = 2; // FST e.EV.E1 2101 fxch = 1; 2102 } 2103 } 2104 else 2105 { // FSTP e.EV.E1 2106 pop87(); 2107 pop87(); 2108 } 2109 sz = tysize(ty1) / 2; 2110 if (*pretregs & (mST01 | mXMM0 | mXMM1)) 2111 { 2112 cs.Iflags = 0; 2113 cs.Irex = 0; 2114 cs.Iop = op1; 2115 getlvalue87(cdb, &cs, e.EV.E1, 0); 2116 cs.IEV1.Voffset += sz; 2117 cs.Irm |= modregrm(0, op2, 0); 2118 makesure87(cdb,e.EV.E2, sz, 0, 0); 2119 cdb.gen(&cs); 2120 genfwait(cdb); 2121 makesure87(cdb,e.EV.E2, 0, 1, 0); 2122 } 2123 else 2124 { 2125 loadea(cdb,e.EV.E1,&cs,op1,op2,sz,0,0); 2126 genfwait(cdb); 2127 } 2128 if (fxch) 2129 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2130 cs.IEV1.Voffset -= sz; 2131 cdb.gen(&cs); 2132 if (fxch) 2133 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2134 if (tysize(TYldouble) == 12) 2135 { 2136 if (op1 == 0xDB) 2137 { 2138 cs.Iop = 0xC7; // MOV EA+10,0 2139 NEWREG(cs.Irm, 0); 2140 cs.IEV1.Voffset += 10; 2141 cs.IFL2 = FLconst; 2142 cs.IEV2.Vint = 0; 2143 cs.Iflags |= CFopsize; 2144 cdb.gen(&cs); 2145 cs.IEV1.Voffset += 12; 2146 cdb.gen(&cs); // MOV EA+22,0 2147 } 2148 } 2149 if (tysize(TYldouble) == 16) 2150 { 2151 if (op1 == 0xDB) 2152 { 2153 cs.Iop = 0xC7; // MOV EA+10,0 2154 NEWREG(cs.Irm, 0); 2155 cs.IEV1.Voffset += 10; 2156 cs.IFL2 = FLconst; 2157 cs.IEV2.Vint = 0; 2158 cs.Iflags |= CFopsize; 2159 cdb.gen(&cs); 2160 2161 cs.IEV1.Voffset += 2; 2162 cs.Iflags &= ~CFopsize; 2163 cdb.gen(&cs); 2164 2165 cs.IEV1.Voffset += 14; 2166 cs.Iflags |= CFopsize; 2167 cdb.gen(&cs); 2168 2169 cs.IEV1.Voffset += 2; 2170 cs.Iflags &= ~CFopsize; 2171 cdb.gen(&cs); 2172 } 2173 } 2174 genfwait(cdb); 2175 freenode(e.EV.E1); 2176 fixresult_complex87(cdb, e,mST01 | mPSW,pretregs); 2177 } 2178 2179 /******************************* 2180 * Perform an assignment while converting to integral type, 2181 * i.e. handle (e1 = (int) e2) 2182 */ 2183 2184 private void cnvteq87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2185 { 2186 code cs; 2187 opcode_t op1; 2188 uint op2; 2189 2190 assert(e.Eoper == OPeq); 2191 assert(!*pretregs); 2192 regm_t retregs = mST0; 2193 elem_debug(e.EV.E2); 2194 codelem(cdb,e.EV.E2.EV.E1,&retregs,false); 2195 2196 switch (e.EV.E2.Eoper) 2197 { case OPd_s16: 2198 op1 = ESC(MFword,1); 2199 op2 = 3; 2200 break; 2201 case OPd_s32: 2202 case OPd_u16: 2203 op1 = ESC(MFlong,1); 2204 op2 = 3; 2205 break; 2206 case OPd_s64: 2207 op1 = 0xDF; 2208 op2 = 7; 2209 break; 2210 default: 2211 assert(0); 2212 } 2213 freenode(e.EV.E2); 2214 2215 genfwait(cdb); 2216 genrnd(cdb, CW_roundto0); // FLDCW roundto0 2217 2218 pop87(); 2219 cs.Iflags = ADDFWAIT() ? CFwait : 0; 2220 if (e.EV.E1.Eoper == OPvar) 2221 notreg(e.EV.E1); // cannot be put in register anymore 2222 loadea(cdb,e.EV.E1,&cs,op1,op2,0,0,0); 2223 2224 genfwait(cdb); 2225 genrnd(cdb, CW_roundtonearest); // FLDCW roundtonearest 2226 2227 freenode(e.EV.E1); 2228 } 2229 2230 /********************************** 2231 * Perform +=, -=, *= and /= for doubles. 2232 */ 2233 2234 void opass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2235 { 2236 code cs; 2237 uint op; 2238 opcode_t opld; 2239 opcode_t op1; 2240 uint op2; 2241 tym_t ty1 = tybasic(e.EV.E1.Ety); 2242 2243 switch (ty1) 2244 { 2245 case TYdouble_alias: 2246 case TYidouble: 2247 case TYdouble: op1 = ESC(MFdouble,1); op2 = 3; break; 2248 case TYifloat: 2249 case TYfloat: op1 = ESC(MFfloat,1); op2 = 3; break; 2250 case TYildouble: 2251 case TYldouble: op1 = 0xDB; op2 = 7; break; 2252 2253 case TYcfloat: 2254 case TYcdouble: 2255 case TYcldouble: 2256 if (e.Eoper == OPmodass) 2257 opmod_complex87(cdb, e, pretregs); 2258 else 2259 opass_complex87(cdb, e, pretregs); 2260 return; 2261 2262 default: 2263 assert(0); 2264 } 2265 switch (e.Eoper) 2266 { 2267 case OPpostinc: 2268 case OPaddass: op = 0 << 3; opld = 0xC1; break; // FADD 2269 case OPpostdec: 2270 case OPminass: op = 5 << 3; opld = 0xE1; /*0xE9;*/ break; // FSUBR 2271 case OPmulass: op = 1 << 3; opld = 0xC9; break; // FMUL 2272 case OPdivass: op = 7 << 3; opld = 0xF1; break; // FDIVR 2273 case OPmodass: break; 2274 default: assert(0); 2275 } 2276 regm_t retregs = mST0; 2277 codelem(cdb,e.EV.E2,&retregs,false); // evaluate rvalue 2278 note87(e.EV.E2,0,0); 2279 getlvalue87(cdb,&cs,e.EV.E1,e.Eoper==OPmodass?mAX:0); 2280 makesure87(cdb,e.EV.E2,0,0,0); 2281 if (config.flags4 & CFG4fdivcall && e.Eoper == OPdivass) 2282 { 2283 push87(cdb); 2284 cs.Iop = op1; 2285 if (ty1 == TYldouble || ty1 == TYildouble) 2286 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2287 cdb.gen(&cs); 2288 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2289 callclib(cdb,e,CLIB.fdiv87,&retregs,0); 2290 pop87(); 2291 } 2292 else if (e.Eoper == OPmodass) 2293 { 2294 /* 2295 * fld tbyte ptr y 2296 * fld tbyte ptr x // ST = x, ST1 = y 2297 * FM1: // We don't use fprem1 because for some inexplicable 2298 * // reason we get -5 when we do _modulo(15, 10) 2299 * fprem // ST = ST % ST1 2300 * fstsw word ptr sw 2301 * fwait 2302 * mov AH,byte ptr sw+1 // get msb of status word in AH 2303 * sahf // transfer to flags 2304 * jp FM1 // continue till ST < ST1 2305 * fstp ST(1) // leave remainder on stack 2306 */ 2307 code *c1; 2308 2309 push87(cdb); 2310 cs.Iop = op1; 2311 if (ty1 == TYldouble || ty1 == TYildouble) 2312 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2313 cdb.gen(&cs); // FLD e.EV.E1 2314 2315 cdb.gen2(0xD9, 0xF8); // FPREM 2316 code *cfm1 = cdb.last(); 2317 genjmpifC2(cdb, cfm1); // JC2 FM1 2318 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 2319 2320 pop87(); 2321 } 2322 else if (ty1 == TYldouble || ty1 == TYildouble) 2323 { 2324 push87(cdb); 2325 cs.Iop = op1; 2326 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2327 cdb.gen(&cs); // FLD e.EV.E1 2328 cdb.genf2(0xDE,opld); // FopP ST(1) 2329 pop87(); 2330 } 2331 else 2332 { 2333 cs.Iop = op1 & ~1; 2334 cs.Irm |= op; 2335 cdb.gen(&cs); // Fop e.EV.E1 2336 } 2337 if (*pretregs & mPSW) 2338 genftst(cdb,e,0); // FTST ST0 2339 // if want result in registers 2340 if (*pretregs & (mST0 | ALLREGS | mBP)) 2341 { 2342 if (ty1 == TYldouble || ty1 == TYildouble) 2343 { 2344 push87(cdb); 2345 cdb.genf2(0xD9,0xC0); // FLD ST(0) 2346 pop87(); 2347 } 2348 else 2349 op2 = 2; // FST e.EV.E1 2350 } 2351 else 2352 { // FSTP 2353 pop87(); 2354 } 2355 cs.Iop = op1; 2356 NEWREG(cs.Irm,op2); // FSTx e.EV.E1 2357 freenode(e.EV.E1); 2358 cdb.gen(&cs); 2359 genfwait(cdb); 2360 fixresult87(cdb,e,mST0 | mPSW,pretregs); 2361 } 2362 2363 /*********************************** 2364 * Perform %= where E1 is complex and E2 is real or imaginary. 2365 */ 2366 2367 private void opmod_complex87(ref CodeBuilder cdb, elem *e,regm_t *pretregs) 2368 { 2369 2370 /* fld E2 2371 fld E1.re 2372 FM1: fprem 2373 fstsw word ptr sw 2374 fwait 2375 mov AH, byte ptr sw+1 2376 jp FM1 2377 fxch ST(1) 2378 fld E1.im 2379 FM2: fprem 2380 fstsw word ptr sw 2381 fwait 2382 mov AH, byte ptr sw+1 2383 jp FM2 2384 fstp ST(1) 2385 */ 2386 2387 code cs; 2388 2389 tym_t ty1 = tybasic(e.EV.E1.Ety); 2390 uint sz2 = _tysize[ty1] / 2; 2391 2392 regm_t retregs = mST0; 2393 codelem(cdb,e.EV.E2,&retregs,false); // FLD E2 2394 note87(e.EV.E2,0,0); 2395 getlvalue87(cdb,&cs,e.EV.E1,0); 2396 makesure87(cdb,e.EV.E2,0,0,0); 2397 2398 push87(cdb); 2399 switch (ty1) 2400 { 2401 case TYcdouble: cs.Iop = ESC(MFdouble,1); break; 2402 case TYcfloat: cs.Iop = ESC(MFfloat,1); break; 2403 case TYcldouble: cs.Iop = 0xDB; cs.Irm |= modregrm(0, 5, 0); break; 2404 default: 2405 assert(0); 2406 } 2407 cdb.gen(&cs); // FLD E1.re 2408 2409 cdb.gen2(0xD9, 0xF8); // FPREM 2410 code *cfm1 = cdb.last(); 2411 genjmpifC2(cdb, cfm1); // JC2 FM1 2412 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 2413 2414 push87(cdb); 2415 cs.IEV1.Voffset += sz2; 2416 cdb.gen(&cs); // FLD E1.im 2417 2418 cdb.gen2(0xD9, 0xF8); // FPREM 2419 code *cfm2 = cdb.last(); 2420 genjmpifC2(cdb, cfm2); // JC2 FM2 2421 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 2422 2423 pop87(); 2424 2425 if (*pretregs & (mST01 | mPSW)) 2426 { 2427 cs.Irm |= modregrm(0, 2, 0); 2428 cdb.gen(&cs); // FST mreal.im 2429 cs.IEV1.Voffset -= sz2; 2430 cdb.gen(&cs); // FST mreal.re 2431 retregs = mST01; 2432 } 2433 else 2434 { 2435 cs.Irm |= modregrm(0, 3, 0); 2436 cdb.gen(&cs); // FSTP mreal.im 2437 cs.IEV1.Voffset -= sz2; 2438 cdb.gen(&cs); // FSTP mreal.re 2439 pop87(); 2440 pop87(); 2441 retregs = 0; 2442 } 2443 freenode(e.EV.E1); 2444 genfwait(cdb); 2445 fixresult_complex87(cdb,e,retregs,pretregs); 2446 } 2447 2448 /********************************** 2449 * Perform +=, -=, *= and /= for the lvalue being complex. 2450 */ 2451 2452 private void opass_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2453 { 2454 regm_t retregs; 2455 regm_t idxregs; 2456 code cs; 2457 uint op; 2458 opcode_t op2; 2459 2460 tym_t ty1 = tybasic(e.EV.E1.Ety); 2461 uint sz2 = _tysize[ty1] / 2; 2462 switch (e.Eoper) 2463 { 2464 case OPpostinc: 2465 case OPaddass: op = 0 << 3; // FADD 2466 op2 = 0xC0; // FADDP ST(i),ST 2467 break; 2468 2469 case OPpostdec: 2470 case OPminass: op = 5 << 3; // FSUBR 2471 op2 = 0xE0; // FSUBRP ST(i),ST 2472 break; 2473 2474 case OPmulass: op = 1 << 3; // FMUL 2475 op2 = 0xC8; // FMULP ST(i),ST 2476 break; 2477 2478 case OPdivass: op = 7 << 3; // FDIVR 2479 op2 = 0xF0; // FDIVRP ST(i),ST 2480 break; 2481 2482 default: assert(0); 2483 } 2484 2485 if (!tycomplex(e.EV.E2.Ety) && 2486 (e.Eoper == OPmulass || e.Eoper == OPdivass)) 2487 { 2488 retregs = mST0; 2489 codelem(cdb,e.EV.E2, &retregs, false); 2490 note87(e.EV.E2, 0, 0); 2491 getlvalue87(cdb,&cs, e.EV.E1, 0); 2492 makesure87(cdb,e.EV.E2,0,0,0); 2493 push87(cdb); 2494 cdb.genf2(0xD9,0xC0); // FLD ST(0) 2495 goto L1; 2496 } 2497 else 2498 { 2499 loadComplex(cdb,e.EV.E2); 2500 getlvalue87(cdb,&cs,e.EV.E1,0); 2501 makesure87(cdb,e.EV.E2,sz2,0,0); 2502 makesure87(cdb,e.EV.E2,0,1,0); 2503 } 2504 2505 switch (e.Eoper) 2506 { 2507 case OPpostinc: 2508 case OPaddass: 2509 case OPpostdec: 2510 case OPminass: 2511 L1: 2512 if (ty1 == TYcldouble) 2513 { 2514 push87(cdb); 2515 push87(cdb); 2516 cs.Iop = 0xDB; 2517 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2518 cdb.gen(&cs); // FLD e.EV.E1.re 2519 cs.IEV1.Voffset += sz2; 2520 cdb.gen(&cs); // FLD e.EV.E1.im 2521 cdb.genf2(0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2522 cdb.genf2(0xDE, op2 + 2); // FADDP/FSUBRP ST(2),ST 2523 pop87(); 2524 pop87(); 2525 if (tyimaginary(e.EV.E2.Ety)) 2526 { 2527 if (e.Eoper == OPmulass) 2528 { 2529 cdb.genf2(0xD9, 0xE0); // FCHS 2530 cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1) 2531 } 2532 else if (e.Eoper == OPdivass) 2533 { 2534 cdb.genf2(0xD9, 0xC8+1); // FXCH ST(1) 2535 cdb.genf2(0xD9, 0xE0); // FCHS 2536 } 2537 } 2538 L2: 2539 if (*pretregs & (mST01 | mPSW)) 2540 { 2541 push87(cdb); 2542 push87(cdb); 2543 cdb.genf2(0xD9,0xC1); // FLD ST(1) 2544 cdb.genf2(0xD9,0xC1); // FLD ST(1) 2545 retregs = mST01; 2546 } 2547 else 2548 retregs = 0; 2549 cs.Iop = 0xDB; 2550 cs.Irm |= modregrm(0,7,0); 2551 cdb.gen(&cs); // FSTP e.EV.E1.im 2552 cs.IEV1.Voffset -= sz2; 2553 cdb.gen(&cs); // FSTP e.EV.E1.re 2554 pop87(); 2555 pop87(); 2556 2557 } 2558 else 2559 { 2560 ubyte rmop = cast(ubyte)(cs.Irm | op); 2561 ubyte rmfst = cs.Irm | modregrm(0,2,0); 2562 ubyte rmfstp = cs.Irm | modregrm(0,3,0); 2563 ubyte iopfst = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2564 opcode_t iop = (ty1 == TYcfloat) ? 0xD8 : 0xDC; 2565 2566 cs.Iop = iop; 2567 cs.Irm = rmop; 2568 cs.IEV1.Voffset += sz2; 2569 cdb.gen(&cs); // FSUBR mreal.im 2570 if (tyimaginary(e.EV.E2.Ety) && (e.Eoper == OPmulass || e.Eoper == OPdivass)) 2571 { 2572 if (e.Eoper == OPmulass) 2573 cdb.genf2(0xD9, 0xE0); // FCHS 2574 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2575 cs.IEV1.Voffset -= sz2; 2576 cdb.gen(&cs); // FMUL mreal.re 2577 if (e.Eoper == OPdivass) 2578 cdb.genf2(0xD9, 0xE0); // FCHS 2579 if (*pretregs & (mST01 | mPSW)) 2580 { 2581 cs.Iop = iopfst; 2582 cs.Irm = rmfst; 2583 cs.IEV1.Voffset += sz2; 2584 cdb.gen(&cs); // FST mreal.im 2585 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2586 cs.IEV1.Voffset -= sz2; 2587 cdb.gen(&cs); // FST mreal.re 2588 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2589 retregs = mST01; 2590 } 2591 else 2592 { 2593 cs.Iop = iopfst; 2594 cs.Irm = rmfstp; 2595 cs.IEV1.Voffset += sz2; 2596 cdb.gen(&cs); // FSTP mreal.im 2597 pop87(); 2598 cs.IEV1.Voffset -= sz2; 2599 cdb.gen(&cs); // FSTP mreal.re 2600 pop87(); 2601 retregs = 0; 2602 } 2603 goto L3; 2604 } 2605 2606 if (*pretregs & (mST01 | mPSW)) 2607 { 2608 cs.Iop = iopfst; 2609 cs.Irm = rmfst; 2610 cdb.gen(&cs); // FST mreal.im 2611 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2612 cs.Iop = iop; 2613 cs.Irm = rmop; 2614 cs.IEV1.Voffset -= sz2; 2615 cdb.gen(&cs); // FSUBR mreal.re 2616 cs.Iop = iopfst; 2617 cs.Irm = rmfst; 2618 cdb.gen(&cs); // FST mreal.re 2619 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 2620 retregs = mST01; 2621 } 2622 else 2623 { 2624 cs.Iop = iopfst; 2625 cs.Irm = rmfstp; 2626 cdb.gen(&cs); // FSTP mreal.im 2627 pop87(); 2628 cs.Iop = iop; 2629 cs.Irm = rmop; 2630 cs.IEV1.Voffset -= sz2; 2631 cdb.gen(&cs); // FSUBR mreal.re 2632 cs.Iop = iopfst; 2633 cs.Irm = rmfstp; 2634 cdb.gen(&cs); // FSTP mreal.re 2635 pop87(); 2636 retregs = 0; 2637 } 2638 } 2639 L3: 2640 freenode(e.EV.E1); 2641 genfwait(cdb); 2642 fixresult_complex87(cdb,e,retregs,pretregs); 2643 return; 2644 2645 case OPmulass: 2646 push87(cdb); 2647 push87(cdb); 2648 if (ty1 == TYcldouble) 2649 { 2650 cs.Iop = 0xDB; 2651 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2652 cdb.gen(&cs); // FLD e.EV.E1.re 2653 cs.IEV1.Voffset += sz2; 2654 cdb.gen(&cs); // FLD e.EV.E1.im 2655 retregs = mST01; 2656 callclib(cdb, e, CLIB.cmul, &retregs, 0); 2657 goto L2; 2658 } 2659 else 2660 { 2661 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2662 cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2663 cdb.gen(&cs); // FLD e.EV.E1.re 2664 cs.IEV1.Voffset += sz2; 2665 cdb.gen(&cs); // FLD e.EV.E1.im 2666 retregs = mST01; 2667 callclib(cdb, e, CLIB.cmul, &retregs, 0); 2668 if (*pretregs & (mST01 | mPSW)) 2669 { 2670 cs.Irm |= modregrm(0, 2, 0); 2671 cdb.gen(&cs); // FST mreal.im 2672 cs.IEV1.Voffset -= sz2; 2673 cdb.gen(&cs); // FST mreal.re 2674 retregs = mST01; 2675 } 2676 else 2677 { 2678 cs.Irm |= modregrm(0, 3, 0); 2679 cdb.gen(&cs); // FSTP mreal.im 2680 cs.IEV1.Voffset -= sz2; 2681 cdb.gen(&cs); // FSTP mreal.re 2682 pop87(); 2683 pop87(); 2684 retregs = 0; 2685 } 2686 goto L3; 2687 } 2688 2689 case OPdivass: 2690 push87(cdb); 2691 push87(cdb); 2692 idxregs = idxregm(&cs); // mask of index regs used 2693 if (ty1 == TYcldouble) 2694 { 2695 cs.Iop = 0xDB; 2696 cs.Irm |= modregrm(0, 5, 0); // FLD tbyte ptr ... 2697 cdb.gen(&cs); // FLD e.EV.E1.re 2698 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2699 cs.IEV1.Voffset += sz2; 2700 cdb.gen(&cs); // FLD e.EV.E1.im 2701 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2702 retregs = mST01; 2703 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs); 2704 goto L2; 2705 } 2706 else 2707 { 2708 cs.Iop = (ty1 == TYcfloat) ? 0xD9 : 0xDD; 2709 cs.Irm |= modregrm(0, 0, 0); // FLD tbyte ptr ... 2710 cdb.gen(&cs); // FLD e.EV.E1.re 2711 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2712 cs.IEV1.Voffset += sz2; 2713 cdb.gen(&cs); // FLD e.EV.E1.im 2714 cdb.genf2(0xD9,0xC8 + 2); // FXCH ST(2) 2715 retregs = mST01; 2716 callclib(cdb, e, CLIB.cdiv, &retregs, idxregs); 2717 if (*pretregs & (mST01 | mPSW)) 2718 { 2719 cs.Irm |= modregrm(0, 2, 0); 2720 cdb.gen(&cs); // FST mreal.im 2721 cs.IEV1.Voffset -= sz2; 2722 cdb.gen(&cs); // FST mreal.re 2723 retregs = mST01; 2724 } 2725 else 2726 { 2727 cs.Irm |= modregrm(0, 3, 0); 2728 cdb.gen(&cs); // FSTP mreal.im 2729 cs.IEV1.Voffset -= sz2; 2730 cdb.gen(&cs); // FSTP mreal.re 2731 pop87(); 2732 pop87(); 2733 retregs = 0; 2734 } 2735 goto L3; 2736 } 2737 2738 default: 2739 assert(0); 2740 } 2741 } 2742 2743 /************************** 2744 * OPnegass 2745 */ 2746 2747 void cdnegass87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2748 { 2749 regm_t retregs; 2750 uint op; 2751 2752 //printf("cdnegass87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2753 elem *e1 = e.EV.E1; 2754 tym_t tyml = tybasic(e1.Ety); // type of lvalue 2755 int sz = _tysize[tyml]; 2756 2757 code cs; 2758 getlvalue87(cdb,&cs,e1,0); 2759 2760 /* If the EA is really an XMM register, modEA() will fail. 2761 * So disallow putting e1 into a register. 2762 * A better way would be to negate the XMM register in place. 2763 */ 2764 if (e1.Eoper == OPvar) 2765 e1.EV.Vsym.Sflags &= ~GTregcand; 2766 2767 modEA(cdb,&cs); 2768 cs.Irm |= modregrm(0,6,0); 2769 cs.Iop = 0x80; 2770 if (tysize(TYldouble) > 10) 2771 { 2772 if (tyml == TYldouble || tyml == TYildouble) 2773 cs.IEV1.Voffset += 10 - 1; 2774 else if (tyml == TYcldouble) 2775 cs.IEV1.Voffset += tysize(TYldouble) + 10 - 1; 2776 else 2777 cs.IEV1.Voffset += sz - 1; 2778 } 2779 else 2780 cs.IEV1.Voffset += sz - 1; 2781 cs.IFL2 = FLconst; 2782 cs.IEV2.Vuns = 0x80; 2783 cdb.gen(&cs); // XOR 7[EA],0x80 2784 if (tycomplex(tyml)) 2785 { 2786 cs.IEV1.Voffset -= sz / 2; 2787 cdb.gen(&cs); // XOR 7[EA],0x80 2788 } 2789 2790 if (*pretregs) 2791 { 2792 switch (tyml) 2793 { 2794 case TYifloat: 2795 case TYfloat: cs.Iop = 0xD9; op = 0; break; 2796 case TYidouble: 2797 case TYdouble: 2798 case TYdouble_alias: cs.Iop = 0xDD; op = 0; break; 2799 case TYildouble: 2800 case TYldouble: cs.Iop = 0xDB; op = 5; break; 2801 default: 2802 assert(0); 2803 } 2804 NEWREG(cs.Irm,op); 2805 cs.IEV1.Voffset -= sz - 1; 2806 push87(cdb); 2807 cdb.gen(&cs); // FLD EA 2808 retregs = mST0; 2809 } 2810 else 2811 retregs = 0; 2812 2813 freenode(e1); 2814 fixresult87(cdb,e,retregs,pretregs); 2815 } 2816 2817 /************************ 2818 * Take care of OPpostinc and OPpostdec. 2819 */ 2820 2821 void post87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2822 { 2823 uint op; 2824 opcode_t op1; 2825 reg_t reg; 2826 2827 //printf("post87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2828 code cs; 2829 assert(*pretregs); 2830 getlvalue87(cdb,&cs,e.EV.E1,0); 2831 tym_t ty1 = tybasic(e.EV.E1.Ety); 2832 switch (ty1) 2833 { 2834 case TYdouble_alias: 2835 case TYidouble: 2836 case TYdouble: 2837 case TYcdouble: op1 = ESC(MFdouble,1); reg = 0; break; 2838 case TYifloat: 2839 case TYfloat: 2840 case TYcfloat: op1 = ESC(MFfloat,1); reg = 0; break; 2841 case TYildouble: 2842 case TYldouble: 2843 case TYcldouble: op1 = 0xDB; reg = 5; break; 2844 default: 2845 assert(0); 2846 } 2847 NEWREG(cs.Irm, reg); 2848 if (reg == 5) 2849 reg = 7; 2850 else 2851 reg = 3; 2852 cs.Iop = op1; 2853 push87(cdb); 2854 cdb.gen(&cs); // FLD e.EV.E1 2855 if (tycomplex(ty1)) 2856 { 2857 uint sz = _tysize[ty1] / 2; 2858 2859 push87(cdb); 2860 cs.IEV1.Voffset += sz; 2861 cdb.gen(&cs); // FLD e.EV.E1 2862 regm_t retregs = mST0; // note kludge to only load real part 2863 codelem(cdb,e.EV.E2,&retregs,false); // load rvalue 2864 cdb.genf2(0xD8, // FADD/FSUBR ST,ST2 2865 (e.Eoper == OPpostinc) ? 0xC0 + 2 : 0xE8 + 2); 2866 NEWREG(cs.Irm,reg); 2867 pop87(); 2868 cs.IEV1.Voffset -= sz; 2869 cdb.gen(&cs); // FSTP e.EV.E1 2870 genfwait(cdb); 2871 freenode(e.EV.E1); 2872 fixresult_complex87(cdb, e, mST01, pretregs); 2873 return; 2874 } 2875 2876 if (*pretregs & (mST0 | ALLREGS | mBP | XMMREGS)) 2877 { // Want the result in a register 2878 push87(cdb); 2879 cdb.genf2(0xD9,0xC0); // FLD ST0 2880 } 2881 if (*pretregs & mPSW) // if result in flags 2882 genftst(cdb,e,0); // FTST ST0 2883 regm_t retregs = mST0; 2884 codelem(cdb,e.EV.E2,&retregs,false); // load rvalue 2885 pop87(); 2886 op = (e.Eoper == OPpostinc) ? modregrm(3,0,1) : modregrm(3,5,1); 2887 cdb.genf2(0xDE,op); // FADDP/FSUBRP ST1 2888 NEWREG(cs.Irm,reg); 2889 pop87(); 2890 cdb.gen(&cs); // FSTP e.EV.E1 2891 genfwait(cdb); 2892 freenode(e.EV.E1); 2893 fixresult87(cdb,e,mPSW | mST0,pretregs); 2894 } 2895 2896 /************************ 2897 * Do the following opcodes: 2898 * OPd_u64 2899 * OPld_u64 2900 */ 2901 void cdd_u64(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2902 { 2903 assert(I32 || I64); 2904 assert(*pretregs); 2905 if (I32) 2906 cdd_u64_I32(cdb, e, pretregs); 2907 else 2908 cdd_u64_I64(cdb, e, pretregs); 2909 } 2910 2911 private void cdd_u64_I32(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2912 { 2913 /* Generate: 2914 mov EDX,0x8000_0000 2915 mov floatreg+0,0 2916 mov floatreg+4,EDX 2917 mov floatreg+8,0x0FBF403e // (roundTo0<<16) | adjust 2918 fld real ptr floatreg // adjust (= 1/real.epsilon) 2919 fcomp 2920 fstsw AX 2921 fstcw floatreg+12 2922 fldcw floatreg+10 // roundTo0 2923 test AH,1 2924 jz L1 // jae L1 2925 2926 fld real ptr floatreg // adjust 2927 fsubp ST(1), ST 2928 fistp floatreg 2929 mov EAX,floatreg 2930 add EDX,floatreg+4 2931 fldcw floatreg+12 2932 jmp L2 2933 2934 L1: 2935 fistp floatreg 2936 mov EAX,floatreg 2937 mov EDX,floatreg+4 2938 fldcw floatreg+12 2939 L2: 2940 */ 2941 regm_t retregs = mST0; 2942 codelem(cdb,e.EV.E1, &retregs, false); 2943 tym_t tym = e.Ety; 2944 retregs = *pretregs; 2945 if (!retregs) 2946 retregs = ALLREGS; 2947 reg_t reg, reg2; 2948 allocreg(cdb,&retregs,®,tym); 2949 reg = findreglsw(retregs); 2950 reg2 = findregmsw(retregs); 2951 movregconst(cdb,reg2,0x80000000,0); 2952 getregs(cdb,mask(reg2) | mAX); 2953 2954 cdb.genfltreg(0xC7,0,0); 2955 code *cf1 = cdb.last(); 2956 cf1.IFL2 = FLconst; 2957 cf1.IEV2.Vint = 0; // MOV floatreg+0,0 2958 cdb.genfltreg(STO,reg2,4); // MOV floatreg+4,EDX 2959 cdb.genfltreg(0xC7,0,8); 2960 code *cf3 = cdb.last(); 2961 cf3.IFL2 = FLconst; 2962 cf3.IEV2.Vint = 0xFBF403E; // MOV floatreg+8,(roundTo0<<16)|adjust 2963 2964 push87(cdb); 2965 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 2966 cdb.gen2(0xD8,0xD9); // FCOMP 2967 pop87(); 2968 cdb.gen2(0xDF,0xE0); // FSTSW AX 2969 cdb.genfltreg(0xD9,7,12); // FSTCW floatreg+12 2970 cdb.genfltreg(0xD9,5,10); // FLDCW floatreg+10 2971 cdb.genc2(0xF6,modregrm(3,0,4),1); // TEST AH,1 2972 code *cnop1 = gennop(null); 2973 genjmp(cdb,JE,FLcode,cast(block *)cnop1); // JZ L1 2974 2975 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 2976 cdb.genf2(0xDE,0xE8+1); // FSUBP ST(1),ST 2977 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 2978 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 2979 cdb.genfltreg(0x03,reg2,4); // ADD reg,floatreg+4 2980 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 2981 code *cnop2 = gennop(null); 2982 genjmp(cdb,JMP,FLcode,cast(block *)cnop2); // JMP L2 2983 2984 cdb.append(cnop1); 2985 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 2986 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 2987 cdb.genfltreg(LOD,reg2,4); // MOV reg,floatreg+4 2988 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 2989 cdb.append(cnop2); 2990 2991 pop87(); 2992 fixresult(cdb,e,retregs,pretregs); 2993 } 2994 2995 private void cdd_u64_I64(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2996 { 2997 /* Generate: 2998 mov EDX,0x8000_0000 2999 mov floatreg+0,0 3000 mov floatreg+4,EDX 3001 mov floatreg+8,0x0FBF403e // (roundTo0<<16) | adjust 3002 fld real ptr floatreg // adjust 3003 fcomp 3004 fstsw AX 3005 fstcw floatreg+12 3006 fldcw floatreg+10 // roundTo0 3007 test AH,1 3008 jz L1 // jae L1 3009 3010 fld real ptr floatreg // adjust 3011 fsubp ST(1), ST 3012 fistp floatreg 3013 mov RAX,floatreg 3014 shl RDX,32 3015 add RAX,RDX 3016 fldcw floatreg+12 3017 jmp L2 3018 3019 L1: 3020 fistp floatreg 3021 mov RAX,floatreg 3022 fldcw floatreg+12 3023 L2: 3024 */ 3025 regm_t retregs = mST0; 3026 codelem(cdb,e.EV.E1, &retregs, false); 3027 tym_t tym = e.Ety; 3028 retregs = *pretregs; 3029 if (!retregs) 3030 retregs = ALLREGS; 3031 reg_t reg; 3032 allocreg(cdb,&retregs,®,tym); 3033 regm_t regm2 = ALLREGS & ~retregs & ~mAX; 3034 reg_t reg2; 3035 allocreg(cdb,®m2,®2,tym); 3036 movregconst(cdb,reg2,0x80000000,0); 3037 getregs(cdb,mask(reg2) | mAX); 3038 3039 cdb.genfltreg(0xC7,0,0); 3040 code *cf1 = cdb.last(); 3041 cf1.IFL2 = FLconst; 3042 cf1.IEV2.Vint = 0; // MOV floatreg+0,0 3043 cdb.genfltreg(STO,reg2,4); // MOV floatreg+4,EDX 3044 cdb.genfltreg(0xC7,0,8); 3045 code *cf3 = cdb.last(); 3046 cf3.IFL2 = FLconst; 3047 cf3.IEV2.Vint = 0xFBF403E; // MOV floatreg+8,(roundTo0<<16)|adjust 3048 3049 push87(cdb); 3050 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 3051 cdb.gen2(0xD8,0xD9); // FCOMP 3052 pop87(); 3053 cdb.gen2(0xDF,0xE0); // FSTSW AX 3054 cdb.genfltreg(0xD9,7,12); // FSTCW floatreg+12 3055 cdb.genfltreg(0xD9,5,10); // FLDCW floatreg+10 3056 cdb.genc2(0xF6,modregrm(3,0,4),1); // TEST AH,1 3057 code *cnop1 = gennop(null); 3058 genjmp(cdb,JE,FLcode,cast(block *)cnop1); // JZ L1 3059 3060 cdb.genfltreg(0xDB,5,0); // FLD real ptr floatreg 3061 cdb.genf2(0xDE,0xE8+1); // FSUBP ST(1),ST 3062 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3063 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3064 code_orrex(cdb.last(), REX_W); 3065 cdb.genc2(0xC1,(REX_W << 16) | modregrmx(3,4,reg2),32); // SHL reg2,32 3066 cdb.gen2(0x03,(REX_W << 16) | modregxrmx(3,reg,reg2)); // ADD reg,reg2 3067 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3068 code *cnop2 = gennop(null); 3069 genjmp(cdb,JMP,FLcode,cast(block *)cnop2); // JMP L2 3070 3071 cdb.append(cnop1); 3072 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3073 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3074 code_orrex(cdb.last(), REX_W); 3075 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3076 cdb.append(cnop2); 3077 3078 pop87(); 3079 fixresult(cdb,e,retregs,pretregs); 3080 } 3081 3082 /************************ 3083 * Do the following opcodes: 3084 * OPd_u32 3085 */ 3086 void cdd_u32(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3087 { 3088 assert(I32 || I64); 3089 3090 /* Generate: 3091 mov floatreg+8,0x0FBF0000 // (roundTo0<<16) 3092 fstcw floatreg+12 3093 fldcw floatreg+10 // roundTo0 3094 fistp floatreg 3095 fldcw floatreg+12 3096 mov EAX,floatreg 3097 */ 3098 regm_t retregs = mST0; 3099 codelem(cdb,e.EV.E1, &retregs, false); 3100 tym_t tym = e.Ety; 3101 retregs = *pretregs & ALLREGS; 3102 if (!retregs) 3103 retregs = ALLREGS; 3104 reg_t reg; 3105 allocreg(cdb,&retregs,®,tym); 3106 3107 cdb.genfltreg(0xC7,0,8); 3108 code *cf3 = cdb.last(); 3109 cf3.IFL2 = FLconst; 3110 cf3.IEV2.Vint = 0x0FBF0000; // MOV floatreg+8,(roundTo0<<16) 3111 3112 cdb.genfltreg(0xD9,7,12); // FSTCW floatreg+12 3113 cdb.genfltreg(0xD9,5,10); // FLDCW floatreg+10 3114 3115 cdb.genfltreg(0xDF,7,0); // FISTP dword ptr floatreg 3116 cdb.genfltreg(0xD9,5,12); // FLDCW floatreg+12 3117 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3118 3119 pop87(); 3120 fixresult(cdb,e,retregs,pretregs); 3121 } 3122 3123 /************************ 3124 * Do the following opcodes: 3125 * OPd_s16 3126 * OPd_s32 3127 * OPd_u16 3128 * OPd_s64 3129 */ 3130 3131 void cnvt87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3132 { 3133 regm_t retregs; 3134 uint mf,rf; 3135 reg_t reg; 3136 int clib; 3137 3138 //printf("cnvt87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3139 assert(*pretregs); 3140 tym_t tym = e.Ety; 3141 int sz = tysize(tym); 3142 int szoff = sz; 3143 3144 switch (e.Eoper) 3145 { 3146 case OPd_s16: 3147 clib = CLIB.dblint87; 3148 mf = ESC(MFword,1); 3149 rf = 3; 3150 break; 3151 3152 case OPd_u16: 3153 szoff = 4; 3154 goto case OPd_s32; 3155 3156 case OPd_s32: 3157 clib = CLIB.dbllng87; 3158 mf = ESC(MFlong,1); 3159 rf = 3; 3160 break; 3161 3162 case OPd_s64: 3163 clib = CLIB.dblllng; 3164 mf = 0xDF; 3165 rf = 7; 3166 break; 3167 3168 default: 3169 assert(0); 3170 } 3171 3172 if (I16) // C may change the default control word 3173 { 3174 if (clib == CLIB.dblllng) 3175 { retregs = I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 3176 codelem(cdb,e.EV.E1,&retregs,false); 3177 callclib(cdb,e,clib,pretregs,0); 3178 } 3179 else 3180 { retregs = mST0; //I32 ? DOUBLEREGS_32 : DOUBLEREGS_16; 3181 codelem(cdb,e.EV.E1,&retregs,false); 3182 callclib(cdb,e,clib,pretregs,0); 3183 pop87(); 3184 } 3185 } 3186 else if (1) 3187 { // Generate: 3188 // sub ESP,12 3189 // fstcw 8[ESP] 3190 // fldcw roundto0 3191 // fistp long64 ptr [ESP] 3192 // fldcw 8[ESP] 3193 // pop lsw 3194 // pop msw 3195 // add ESP,4 3196 3197 uint szpush = szoff + 2; 3198 if (config.flags3 & CFG3pic) 3199 szpush += 2; 3200 szpush = (szpush + REGSIZE - 1) & ~(REGSIZE - 1); 3201 3202 retregs = mST0; 3203 codelem(cdb,e.EV.E1,&retregs,false); 3204 3205 if (szpush == REGSIZE) 3206 cdb.gen1(0x50 + AX); // PUSH EAX 3207 else 3208 cod3_stackadj(cdb, szpush); 3209 genfwait(cdb); 3210 cdb.genc1(0xD9,modregrm(2,7,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FSTCW szoff[ESP] 3211 3212 genfwait(cdb); 3213 3214 if (config.flags3 & CFG3pic) 3215 { 3216 cdb.genc(0xC7,modregrm(2,0,4) + 256*modregrm(0,4,SP),FLconst,szoff+2,FLconst,CW_roundto0); // MOV szoff+2[ESP], CW_roundto0 3217 code_orflag(cdb.last(), CFopsize); 3218 cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff+2); // FLDCW szoff+2[ESP] 3219 } 3220 else 3221 genrnd(cdb, CW_roundto0); // FLDCW roundto0 3222 3223 pop87(); 3224 3225 genfwait(cdb); 3226 cdb.gen2sib(mf,modregrm(0,rf,4),modregrm(0,4,SP)); // FISTP [ESP] 3227 3228 retregs = *pretregs & (ALLREGS | mBP); 3229 if (!retregs) 3230 retregs = ALLREGS; 3231 allocreg(cdb,&retregs,®,tym); 3232 3233 genfwait(cdb); // FWAIT 3234 cdb.genc1(0xD9,modregrm(2,5,4) + 256*modregrm(0,4,SP),FLconst,szoff); // FLDCW szoff[ESP] 3235 3236 if (szoff > REGSIZE) 3237 { szpush -= REGSIZE; 3238 genpop(cdb,findreglsw(retregs)); // POP lsw 3239 } 3240 szpush -= REGSIZE; 3241 genpop(cdb,reg); // POP reg 3242 3243 if (szpush) 3244 cod3_stackadj(cdb, -szpush); 3245 fixresult(cdb,e,retregs,pretregs); 3246 } 3247 else 3248 { 3249 // This is incorrect. For -inf and nan, the 8087 returns the largest 3250 // negative int (0x80000....). For -inf, 0x7FFFF... should be returned, 3251 // and for nan, 0 should be returned. 3252 retregs = mST0; 3253 codelem(cdb,e.EV.E1,&retregs,false); 3254 3255 genfwait(cdb); 3256 genrnd(cdb, CW_roundto0); // FLDCW roundto0 3257 3258 pop87(); 3259 cdb.genfltreg(mf,rf,0); // FISTP floatreg 3260 retregs = *pretregs & (ALLREGS | mBP); 3261 if (!retregs) 3262 retregs = ALLREGS; 3263 allocreg(cdb,&retregs,®,tym); 3264 3265 genfwait(cdb); 3266 3267 if (sz > REGSIZE) 3268 { 3269 cdb.genfltreg(LOD,reg,REGSIZE); // MOV reg,floatreg + REGSIZE 3270 // MOV lsreg,floatreg 3271 cdb.genfltreg(LOD,findreglsw(retregs),0); 3272 } 3273 else 3274 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3275 genrnd(cdb, CW_roundtonearest); // FLDCW roundtonearest 3276 fixresult(cdb,e,retregs,pretregs); 3277 } 3278 } 3279 3280 /************************ 3281 * Do OPrndtol. 3282 */ 3283 3284 void cdrndtol(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3285 { 3286 if (*pretregs == 0) 3287 { 3288 codelem(cdb,e.EV.E1,pretregs,false); 3289 return; 3290 } 3291 regm_t retregs = mST0; 3292 codelem(cdb,e.EV.E1,&retregs,false); 3293 3294 ubyte op1,op2; 3295 tym_t tym = e.Ety; 3296 uint sz = tysize(tym); 3297 switch (sz) 3298 { case 2: 3299 op1 = 0xDF; 3300 op2 = 3; 3301 break; 3302 case 4: 3303 op1 = 0xDB; 3304 op2 = 3; 3305 break; 3306 case 8: 3307 op1 = 0xDF; 3308 op2 = 7; 3309 break; 3310 default: 3311 assert(0); 3312 } 3313 3314 pop87(); 3315 cdb.genfltreg(op1,op2,0); // FISTP floatreg 3316 retregs = *pretregs & (ALLREGS | mBP); 3317 if (!retregs) 3318 retregs = ALLREGS; 3319 reg_t reg; 3320 allocreg(cdb,&retregs,®,tym); 3321 genfwait(cdb); // FWAIT 3322 if (tysize(tym) > REGSIZE) 3323 { 3324 cdb.genfltreg(LOD,reg,REGSIZE); // MOV reg,floatreg + REGSIZE 3325 // MOV lsreg,floatreg 3326 cdb.genfltreg(LOD,findreglsw(retregs),0); 3327 } 3328 else 3329 { 3330 cdb.genfltreg(LOD,reg,0); // MOV reg,floatreg 3331 if (tysize(tym) == 8 && I64) 3332 code_orrex(cdb.last(), REX_W); 3333 } 3334 fixresult(cdb,e,retregs,pretregs); 3335 } 3336 3337 /************************* 3338 * Do OPscale, OPyl2x, OPyl2xp1. 3339 */ 3340 3341 void cdscale(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3342 { 3343 assert(*pretregs != 0); 3344 3345 regm_t retregs = mST0; 3346 codelem(cdb,e.EV.E1,&retregs,false); 3347 note87(e.EV.E1,0,0); 3348 codelem(cdb,e.EV.E2,&retregs,false); 3349 makesure87(cdb,e.EV.E1,0,1,0); // now have x,y on stack; need y,x 3350 switch (e.Eoper) 3351 { 3352 case OPscale: 3353 cdb.genf2(0xD9,0xFD); // FSCALE 3354 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 3355 break; 3356 3357 case OPyl2x: 3358 cdb.genf2(0xD9,0xF1); // FYL2X 3359 break; 3360 3361 case OPyl2xp1: 3362 cdb.genf2(0xD9,0xF9); // FYL2XP1 3363 break; 3364 3365 default: 3366 assert(0); 3367 } 3368 pop87(); 3369 fixresult87(cdb,e,mST0,pretregs); 3370 } 3371 3372 3373 /********************************** 3374 * Unary -, absolute value, square root, sine, cosine 3375 */ 3376 3377 void neg87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3378 { 3379 //printf("neg87()\n"); 3380 3381 assert(*pretregs); 3382 opcode_t op; 3383 switch (e.Eoper) 3384 { case OPneg: op = 0xE0; break; 3385 case OPabs: op = 0xE1; break; 3386 case OPsqrt: op = 0xFA; break; 3387 case OPsin: op = 0xFE; break; 3388 case OPcos: op = 0xFF; break; 3389 case OPrint: op = 0xFC; break; // FRNDINT 3390 default: 3391 assert(0); 3392 } 3393 regm_t retregs = mST0; 3394 codelem(cdb,e.EV.E1,&retregs,false); 3395 cdb.genf2(0xD9,op); // FCHS/FABS/FSQRT/FSIN/FCOS/FRNDINT 3396 fixresult87(cdb,e,mST0,pretregs); 3397 } 3398 3399 /********************************** 3400 * Unary - for complex operands 3401 */ 3402 3403 void neg_complex87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3404 { 3405 assert(e.Eoper == OPneg); 3406 regm_t retregs = mST01; 3407 codelem(cdb,e.EV.E1,&retregs,false); 3408 cdb.genf2(0xD9,0xE0); // FCHS 3409 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 3410 cdb.genf2(0xD9,0xE0); // FCHS 3411 cdb.genf2(0xD9,0xC8 + 1); // FXCH ST(1) 3412 fixresult_complex87(cdb,e,mST01,pretregs); 3413 } 3414 3415 /********************************* 3416 */ 3417 3418 void cdind87(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3419 { 3420 //printf("cdind87(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 3421 code cs; 3422 3423 getlvalue87(cdb,&cs,e,0); // get addressing mode 3424 if (*pretregs) 3425 { 3426 switch (tybasic(e.Ety)) 3427 { case TYfloat: 3428 case TYifloat: 3429 cs.Iop = 0xD9; 3430 break; 3431 3432 case TYidouble: 3433 case TYdouble: 3434 case TYdouble_alias: 3435 cs.Iop = 0xDD; 3436 break; 3437 3438 case TYildouble: 3439 case TYldouble: 3440 cs.Iop = 0xDB; 3441 cs.Irm |= modregrm(0,5,0); 3442 break; 3443 3444 default: 3445 assert(0); 3446 } 3447 push87(cdb); 3448 cdb.gen(&cs); // FLD EA 3449 fixresult87(cdb,e,mST0,pretregs); 3450 } 3451 } 3452 3453 /************************************ 3454 * Reset statics for another .obj file. 3455 */ 3456 3457 void cg87_reset() 3458 { 3459 memset(&oldd,0,oldd.sizeof); 3460 } 3461 3462 3463 /***************************************** 3464 * Initialize control word constants. 3465 */ 3466 3467 private void genrnd(ref CodeBuilder cdb, short cw) 3468 { 3469 if (config.flags3 & CFG3pic) 3470 { 3471 cdb.genfltreg(0xC7, 0, 0); // MOV floatreg, cw 3472 code *c1 = cdb.last(); 3473 c1.IFL2 = FLconst; 3474 c1.IEV2.Vuns = cw; 3475 3476 cdb.genfltreg(0xD9, 5, 0); // FLDCW floatreg 3477 } 3478 else 3479 { 3480 if (!oldd.round) // if not initialized 3481 { 3482 short cwi; 3483 3484 oldd.round = 1; 3485 3486 cwi = CW_roundto0; // round to 0 3487 oldd.roundto0 = out_readonly_sym(TYshort,&cwi,2); 3488 cwi = CW_roundtonearest; // round to nearest 3489 oldd.roundtonearest = out_readonly_sym(TYshort,&cwi,2); 3490 } 3491 Symbol *rnddir = (cw == CW_roundto0) ? oldd.roundto0 : oldd.roundtonearest; 3492 code cs; 3493 cs.Iop = 0xD9; 3494 cs.Iflags = CFoff; 3495 cs.Irex = 0; 3496 cs.IEV1.Vsym = rnddir; 3497 cs.IFL1 = rnddir.Sfl; 3498 cs.IEV1.Voffset = 0; 3499 cs.Irm = modregrm(0,5,BPRM); 3500 cdb.gen(&cs); 3501 } 3502 } 3503 3504 /************************* Complex Numbers *********************/ 3505 3506 /*************************** 3507 * Set the PSW based on the state of ST01. 3508 * Input: 3509 * pop if stack should be popped after test 3510 */ 3511 3512 private void genctst(ref CodeBuilder cdb,elem *e,int pop) 3513 { 3514 assert(pop == 0 || pop == 1); 3515 3516 // Generate: 3517 // if (NOSAHF && pop) 3518 // FLDZ 3519 // FUCOMIP 3520 // JNE L1 3521 // JP L1 // if NAN 3522 // FLDZ 3523 // FUCOMIP ST(2) 3524 // L1: 3525 // if (pop) 3526 // FPOP 3527 // FPOP 3528 // if (pop) 3529 // FLDZ 3530 // FUCOMPP 3531 // FSTSW AX 3532 // SAHF 3533 // FLDZ 3534 // FUCOMPP 3535 // JNE L1 3536 // JP L1 // if NAN 3537 // FSTSW AX 3538 // SAHF 3539 // L1: 3540 // else 3541 // FLDZ 3542 // FUCOM 3543 // FSTSW AX 3544 // SAHF 3545 // FUCOMP ST(2) 3546 // JNE L1 3547 // JP L1 // if NAN 3548 // FSTSW AX 3549 // SAHF 3550 // L1: 3551 // FUCOMP doesn't raise exceptions on QNANs, unlike FTST 3552 3553 CodeBuilder cdbnop; 3554 cdbnop.ctor(); 3555 cdbnop.gennop(); 3556 code *cnop = cdbnop.peek(); 3557 push87(cdb); 3558 cdb.gen2(0xD9,0xEE); // FLDZ 3559 if (NOSAHF) 3560 { 3561 cdb.gen2(0xDF,0xE9); // FUCOMIP 3562 pop87(); 3563 genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE L1 3564 genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP L1 3565 cdb.gen2(0xD9,0xEE); // FLDZ 3566 cdb.gen2(0xDF,0xEA); // FUCOMIP ST(2) 3567 if (pop) 3568 { 3569 cdbnop.genf2(0xDD,modregrm(3,3,0)); // FPOP 3570 cdbnop.genf2(0xDD,modregrm(3,3,0)); // FPOP 3571 pop87(); 3572 pop87(); 3573 } 3574 } 3575 else if (pop) 3576 { 3577 cdb.gen2(0xDA,0xE9); // FUCOMPP 3578 pop87(); 3579 pop87(); 3580 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3581 cdb.gen2(0xD9,0xEE); // FLDZ 3582 cdb.gen2(0xDA,0xE9); // FUCOMPP 3583 pop87(); 3584 genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE L1 3585 genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP L1 3586 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3587 } 3588 else 3589 { 3590 cdb.gen2(0xDD,0xE1); // FUCOM 3591 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3592 cdb.gen2(0xDD,0xEA); // FUCOMP ST(2) 3593 pop87(); 3594 genjmp(cdb,JNE,FLcode,cast(block *) cnop); // JNE L1 3595 genjmp(cdb,JP, FLcode,cast(block *) cnop); // JP L1 3596 cg87_87topsw(cdb); // put 8087 flags in CPU flags 3597 } 3598 cdb.append(cdbnop); 3599 } 3600 3601 /****************************** 3602 * Given the result of an expression is in retregs, 3603 * generate necessary code to return result in *pretregs. 3604 */ 3605 3606 3607 void fixresult_complex87(ref CodeBuilder cdb,elem *e,regm_t retregs,regm_t *pretregs) 3608 { 3609 static if (0) 3610 { 3611 printf("fixresult_complex87(e = %p, retregs = %s, *pretregs = %s)\n", 3612 e,regm_str(retregs),regm_str(*pretregs)); 3613 } 3614 3615 assert(!*pretregs || retregs); 3616 tym_t tym = tybasic(e.Ety); 3617 uint sz = _tysize[tym]; 3618 3619 if (*pretregs == 0 && retregs == mST01) 3620 { 3621 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 3622 pop87(); 3623 cdb.genf2(0xDD,modregrm(3,3,0)); // FPOP 3624 pop87(); 3625 } 3626 else if (tym == TYllong) 3627 { 3628 // passing cfloat through register for I64 3629 assert(retregs & mST01, "this float expression is not implemented"); 3630 pop87(); 3631 cdb.genfltreg(ESC(MFfloat,1),BX,4); // FSTP floatreg 3632 pop87(); 3633 cdb.genfltreg(ESC(MFfloat,1),BX,0); // FSTP floatreg+4 3634 genfwait(cdb); 3635 const reg = findreg(*pretregs); 3636 getregs(cdb,reg); 3637 cdb.genfltreg(LOD, reg, 0); // MOV ECX,floatreg 3638 code_orrex(cdb.last(), REX_W); // extend to RCX 3639 } 3640 else if (tym == TYcfloat && *pretregs & (mAX|mDX) && retregs & mST01) 3641 { 3642 if (*pretregs & mPSW && !(retregs & mPSW)) 3643 genctst(cdb,e,0); // FTST 3644 pop87(); 3645 cdb.genfltreg(ESC(MFfloat,1),3,0); // FSTP floatreg 3646 genfwait(cdb); 3647 getregs(cdb,mDX|mAX); 3648 cdb.genfltreg(LOD, DX, 0); // MOV EDX,floatreg 3649 3650 pop87(); 3651 cdb.genfltreg(ESC(MFfloat,1),3,0); // FSTP floatreg 3652 genfwait(cdb); 3653 cdb.genfltreg(LOD, AX, 0); // MOV EAX,floatreg 3654 } 3655 else if (tym == TYcfloat && retregs & (mAX|mDX) && *pretregs & mST01) 3656 { 3657 push87(cdb); 3658 cdb.genfltreg(STO, AX, 0); // MOV floatreg, EAX 3659 cdb.genfltreg(0xD9, 0, 0); // FLD float ptr floatreg 3660 3661 push87(cdb); 3662 cdb.genfltreg(STO, DX, 0); // MOV floatreg, EDX 3663 cdb.genfltreg(0xD9, 0, 0); // FLD float ptr floatreg 3664 3665 if (*pretregs & mPSW) 3666 genctst(cdb,e,0); // FTST 3667 } 3668 else if ((tym == TYcfloat || tym == TYcdouble) && 3669 *pretregs & (mXMM0|mXMM1) && retregs & mST01) 3670 { 3671 tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble; 3672 uint xop = xmmload(tyf); 3673 uint mf = tyf == TYfloat ? MFfloat : MFdouble; 3674 if (*pretregs & mPSW && !(retregs & mPSW)) 3675 genctst(cdb,e,0); // FTST 3676 pop87(); 3677 cdb.genfltreg(ESC(mf,1),3,0); // FSTP floatreg 3678 genfwait(cdb); 3679 getregs(cdb,mXMM0|mXMM1); 3680 cdb.genxmmreg(xop,XMM1,0,tyf); 3681 3682 pop87(); 3683 cdb.genfltreg(ESC(mf,1),3,0); // FSTP floatreg 3684 genfwait(cdb); 3685 cdb.genxmmreg(xop, XMM0, 0, tyf); // MOVD XMM0,floatreg 3686 } 3687 else if ((tym == TYcfloat || tym == TYcdouble) && 3688 retregs & (mXMM0|mXMM1) && *pretregs & mST01) 3689 { 3690 tym_t tyf = tym == TYcfloat ? TYfloat : TYdouble; 3691 uint xop = xmmstore(tyf); 3692 uint fop = tym == TYcfloat ? 0xD9 : 0xDD; 3693 push87(cdb); 3694 cdb.genfltreg(xop, XMM0-XMM0, 0); // STOS(SD) floatreg, XMM0 3695 checkSetVex(cdb.last(),tyf); 3696 cdb.genfltreg(fop, 0, 0); // FLD double ptr floatreg 3697 3698 push87(cdb); 3699 cdb.genxmmreg(xop, XMM1, 0, tyf); // MOV floatreg, XMM1 3700 cdb.genfltreg(fop, 0, 0); // FLD double ptr floatreg 3701 3702 if (*pretregs & mPSW) 3703 genctst(cdb,e,0); // FTST 3704 } 3705 else 3706 { if (*pretregs & mPSW) 3707 { if (!(retregs & mPSW)) 3708 { assert(retregs & mST01); 3709 genctst(cdb,e,!(*pretregs & mST01)); // FTST 3710 } 3711 } 3712 assert(!(*pretregs & mST01) || (retregs & mST01)); 3713 } 3714 if (*pretregs & mST01) 3715 { note87(e,0,1); 3716 note87(e,sz/2,0); 3717 } 3718 } 3719 3720 /***************************************** 3721 * Operators OPc_r and OPc_i 3722 */ 3723 3724 void cdconvt87(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3725 { 3726 regm_t retregs = mST01; 3727 codelem(cdb,e.EV.E1, &retregs, false); 3728 switch (e.Eoper) 3729 { 3730 case OPc_r: 3731 cdb.genf2(0xDD,0xD8 + 0); // FPOP 3732 pop87(); 3733 break; 3734 3735 case OPc_i: 3736 cdb.genf2(0xDD,0xD8 + 1); // FSTP ST(1) 3737 pop87(); 3738 break; 3739 3740 default: 3741 assert(0); 3742 } 3743 retregs = mST0; 3744 fixresult87(cdb, e, retregs, pretregs); 3745 } 3746 3747 /************************************** 3748 * Load complex operand into ST01 or flags or both. 3749 */ 3750 3751 void cload87(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3752 { 3753 //printf("e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3754 //elem_print(e); 3755 assert(!I16); 3756 debug 3757 if (I32) 3758 { 3759 assert(config.inline8087); 3760 elem_debug(e); 3761 assert(*pretregs & (mST01 | mPSW)); 3762 assert(!(*pretregs & ~(mST01 | mPSW))); 3763 } 3764 3765 tym_t ty = tybasic(e.Ety); 3766 code cs = void; 3767 uint mf; 3768 uint sz; 3769 ubyte ldop; 3770 regm_t retregs; 3771 int i; 3772 3773 //printf("cload87(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3774 sz = _tysize[ty] / 2; 3775 memset(&cs, 0, cs.sizeof); 3776 if (ADDFWAIT()) 3777 cs.Iflags = CFwait; 3778 switch (ty) 3779 { 3780 case TYcfloat: mf = MFfloat; break; 3781 case TYcdouble: mf = MFdouble; break; 3782 case TYcldouble: break; 3783 default: assert(0); 3784 } 3785 switch (e.Eoper) 3786 { 3787 case OPvar: 3788 notreg(e); // never enregister this variable 3789 goto case OPind; 3790 3791 case OPind: 3792 push87(cdb); 3793 push87(cdb); 3794 switch (ty) 3795 { 3796 case TYcfloat: 3797 case TYcdouble: 3798 loadea(cdb,e,&cs,ESC(mf,1),0,0,0,0); // FLD var 3799 cs.IEV1.Voffset += sz; 3800 cdb.gen(&cs); 3801 break; 3802 3803 case TYcldouble: 3804 loadea(cdb,e,&cs,0xDB,5,0,0,0); // FLD var 3805 cs.IEV1.Voffset += sz; 3806 cdb.gen(&cs); 3807 break; 3808 3809 default: 3810 assert(0); 3811 } 3812 retregs = mST01; 3813 break; 3814 3815 case OPd_ld: 3816 case OPld_d: 3817 case OPf_d: 3818 case OPd_f: 3819 cload87(cdb,e.EV.E1, pretregs); 3820 freenode(e.EV.E1); 3821 return; 3822 3823 case OPconst: 3824 push87(cdb); 3825 push87(cdb); 3826 for (i = 0; i < 2; i++) 3827 { 3828 ldop = loadconst(e, i); 3829 if (ldop) 3830 { 3831 cdb.genf2(0xD9,ldop); // FLDx 3832 } 3833 else 3834 { 3835 assert(0); 3836 } 3837 } 3838 retregs = mST01; 3839 break; 3840 3841 default: 3842 debug elem_print(e); 3843 assert(0); 3844 } 3845 fixresult_complex87(cdb, e, retregs, pretregs); 3846 } 3847 3848 /********************************************** 3849 * Load OPpair or OPrpair into mST01 3850 */ 3851 void loadPair87(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3852 { 3853 assert(e.Eoper == OPpair || e.Eoper == OPrpair); 3854 regm_t retregs = mST0; 3855 codelem(cdb,e.EV.E1, &retregs, false); 3856 note87(e.EV.E1, 0, 0); 3857 codelem(cdb,e.EV.E2, &retregs, false); 3858 makesure87(cdb,e.EV.E1, 0, 1, 0); 3859 if (e.Eoper == OPrpair) 3860 cdb.genf2(0xD9, 0xC8 + 1); // FXCH ST(1) 3861 retregs = mST01; 3862 fixresult_complex87(cdb, e, retregs, pretregs); 3863 } 3864 3865 /********************************************** 3866 * Round 80 bit precision to 32 or 64 bits. 3867 * OPtoprec 3868 */ 3869 void cdtoprec(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3870 { 3871 //printf("cdtoprec: *pretregs = %s\n", regm_str(*pretregs)); 3872 if (!*pretregs) 3873 { 3874 codelem(cdb,e.EV.E1,pretregs,false); 3875 return; 3876 } 3877 3878 assert(config.inline8087); 3879 regm_t retregs = mST0; 3880 codelem(cdb,e.EV.E1, &retregs, false); 3881 if (*pretregs & mST0) 3882 { 3883 const tym = tybasic(e.Ety); 3884 const sz = _tysize[tym]; 3885 uint mf = (sz == FLOATSIZE) ? MFfloat : MFdouble; 3886 cdb.genfltreg(ESC(mf,1),3,0); // FSTP float/double ptr fltreg 3887 genfwait(cdb); 3888 cdb.genfltreg(ESC(mf,1),0,0); // FLD float/double ptr fltreg 3889 } 3890 fixresult87(cdb, e, retregs, pretregs); 3891 } 3892 3893 }