1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1984-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod1.d, backend/cod1.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod1.d 11 */ 12 13 module dmd.backend.cod1; 14 15 version (SCPP) 16 version = COMPILE; 17 version (MARS) 18 version = COMPILE; 19 20 version (COMPILE) 21 { 22 23 import core.stdc.stdio; 24 import core.stdc.stdlib; 25 import core.stdc.string; 26 27 import dmd.backend.backend; 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.code; 31 import dmd.backend.code_x86; 32 import dmd.backend.codebuilder; 33 import dmd.backend.mem; 34 import dmd.backend.el; 35 import dmd.backend.exh; 36 import dmd.backend.global; 37 import dmd.backend.obj; 38 import dmd.backend.oper; 39 import dmd.backend.rtlsym; 40 import dmd.backend.ty; 41 import dmd.backend.type; 42 import dmd.backend.xmm; 43 44 extern (C++): 45 46 nothrow: 47 48 int REGSIZE(); 49 50 extern __gshared CGstate cgstate; 51 extern __gshared ubyte[FLMAX] segfl; 52 extern __gshared bool[FLMAX] stackfl; 53 54 private extern (D) uint mask(uint m) { return 1 << m; } 55 56 private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 57 58 /* array to convert from index register to r/m field */ 59 /* AX CX DX BX SP BP SI DI */ 60 private __gshared const byte[8] regtorm32 = [ 0, 1, 2, 3,-1, 5, 6, 7 ]; 61 __gshared const byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; 62 63 targ_size_t paramsize(elem *e, tym_t tyf); 64 //void funccall(ref CodeBuilder cdb,elem *e,uint numpara,uint numalign, 65 // regm_t *pretregs,regm_t keepmsk, bool usefuncarg); 66 67 /********************************* 68 * Determine if we should leave parameter `s` in the register it 69 * came in, or allocate a register it using the register 70 * allocator. 71 * Params: 72 * s = parameter Symbol 73 * Returns: 74 * `true` if `s` is a register parameter and leave it in the register it came in 75 */ 76 bool regParamInPreg(Symbol* s) 77 { 78 //printf("regPAramInPreg %s\n", s.Sident.ptr); 79 return (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && 80 (!(config.flags4 & CFG4optimized) || !(s.Sflags & GTregcand)); 81 } 82 83 84 /************************** 85 * Determine if e is a 32 bit scaled index addressing mode. 86 * Returns: 87 * 0 not a scaled index addressing mode 88 * !=0 the value for ss in the SIB byte 89 */ 90 91 int isscaledindex(elem *e) 92 { 93 targ_uns ss; 94 95 assert(!I16); 96 while (e.Eoper == OPcomma) 97 e = e.EV.E2; 98 if (!(e.Eoper == OPshl && !e.Ecount && 99 e.EV.E2.Eoper == OPconst && 100 (ss = e.EV.E2.EV.Vuns) <= 3 101 ) 102 ) 103 ss = 0; 104 return ss; 105 } 106 107 /********************************************* 108 * Generate code for which isscaledindex(e) returned a non-zero result. 109 */ 110 111 /*private*/ void cdisscaledindex(ref CodeBuilder cdb,elem *e,regm_t *pidxregs,regm_t keepmsk) 112 { 113 // Load index register with result of e.EV.E1 114 while (e.Eoper == OPcomma) 115 { 116 regm_t r = 0; 117 scodelem(cdb, e.EV.E1, &r, keepmsk, true); 118 freenode(e); 119 e = e.EV.E2; 120 } 121 assert(e.Eoper == OPshl); 122 scodelem(cdb, e.EV.E1, pidxregs, keepmsk, true); 123 freenode(e.EV.E2); 124 freenode(e); 125 } 126 127 /*********************************** 128 * Determine index if we can do two LEA instructions as a multiply. 129 * Returns: 130 * 0 can't do it 131 */ 132 133 enum 134 { 135 SSFLnobp = 1, /// can't have EBP in relconst 136 SSFLnobase1 = 2, /// no base register for first LEA 137 SSFLnobase = 4, /// no base register 138 SSFLlea = 8, /// can do it in one LEA 139 } 140 141 struct Ssindex 142 { 143 targ_uns product; 144 ubyte ss1; 145 ubyte ss2; 146 ubyte ssflags; /// SSFLxxxx 147 } 148 149 private __gshared const Ssindex[21] ssindex_array = 150 [ 151 { 0, 0, 0 }, // [0] is a place holder 152 153 { 3, 1, 0, SSFLnobp | SSFLlea }, 154 { 5, 2, 0, SSFLnobp | SSFLlea }, 155 { 9, 3, 0, SSFLnobp | SSFLlea }, 156 157 { 6, 1, 1, SSFLnobase }, 158 { 12, 1, 2, SSFLnobase }, 159 { 24, 1, 3, SSFLnobase }, 160 { 10, 2, 1, SSFLnobase }, 161 { 20, 2, 2, SSFLnobase }, 162 { 40, 2, 3, SSFLnobase }, 163 { 18, 3, 1, SSFLnobase }, 164 { 36, 3, 2, SSFLnobase }, 165 { 72, 3, 3, SSFLnobase }, 166 167 { 15, 2, 1, SSFLnobp }, 168 { 25, 2, 2, SSFLnobp }, 169 { 27, 3, 1, SSFLnobp }, 170 { 45, 3, 2, SSFLnobp }, 171 { 81, 3, 3, SSFLnobp }, 172 173 { 16, 3, 1, SSFLnobase1 | SSFLnobase }, 174 { 32, 3, 2, SSFLnobase1 | SSFLnobase }, 175 { 64, 3, 3, SSFLnobase1 | SSFLnobase }, 176 ]; 177 178 int ssindex(OPER op,targ_uns product) 179 { 180 if (op == OPshl) 181 product = 1 << product; 182 for (size_t i = 1; i < ssindex_array.length; i++) 183 { 184 if (ssindex_array[i].product == product) 185 return cast(int)i; 186 } 187 return 0; 188 } 189 190 /*************************************** 191 * Build an EA of the form disp[base][index*scale]. 192 * Input: 193 * c struct to fill in 194 * base base register (-1 if none) 195 * index index register (-1 if none) 196 * scale scale factor - 1,2,4,8 197 * disp displacement 198 */ 199 200 void buildEA(code *c,int base,int index,int scale,targ_size_t disp) 201 { 202 ubyte rm; 203 ubyte sib; 204 ubyte rex = 0; 205 206 sib = 0; 207 if (!I16) 208 { uint ss; 209 210 assert(index != SP); 211 212 switch (scale) 213 { case 1: ss = 0; break; 214 case 2: ss = 1; break; 215 case 4: ss = 2; break; 216 case 8: ss = 3; break; 217 default: assert(0); 218 } 219 220 if (base == -1) 221 { 222 if (index == -1) 223 rm = modregrm(0,0,5); 224 else 225 { 226 rm = modregrm(0,0,4); 227 sib = modregrm(ss,index & 7,5); 228 if (index & 8) 229 rex |= REX_X; 230 } 231 } 232 else if (index == -1) 233 { 234 if (base == SP) 235 { 236 rm = modregrm(2, 0, 4); 237 sib = modregrm(0, 4, SP); 238 } 239 else 240 { rm = modregrm(2, 0, base & 7); 241 if (base & 8) 242 { rex |= REX_B; 243 if (base == R12) 244 { 245 rm = modregrm(2, 0, 4); 246 sib = modregrm(0, 4, 4); 247 } 248 } 249 } 250 } 251 else 252 { 253 rm = modregrm(2, 0, 4); 254 sib = modregrm(ss,index & 7,base & 7); 255 if (index & 8) 256 rex |= REX_X; 257 if (base & 8) 258 rex |= REX_B; 259 } 260 } 261 else 262 { 263 // -1 AX CX DX BX SP BP SI DI 264 static immutable ubyte[9][9] EA16rm = 265 [ 266 [ 0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85, ], // -1 267 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // AX 268 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // CX 269 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // DX 270 [ 0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81, ], // BX 271 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // SP 272 [ 0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83, ], // BP 273 [ 0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09, ], // SI 274 [ 0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09, ] // DI 275 ]; 276 277 assert(scale == 1); 278 rm = EA16rm[base + 1][index + 1]; 279 assert(rm != 9); 280 } 281 c.Irm = rm; 282 c.Isib = sib; 283 c.Irex = rex; 284 c.IFL1 = FLconst; 285 c.IEV1.Vuns = cast(targ_uns)disp; 286 } 287 288 /********************************************* 289 * Build REX, modregrm and sib bytes 290 */ 291 292 uint buildModregrm(int mod, int reg, int rm) 293 { 294 uint m; 295 if (I16) 296 m = modregrm(mod, reg, rm); 297 else 298 { 299 if ((rm & 7) == SP && mod != 3) 300 m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4); 301 else 302 m = modregrm(mod,reg & 7,rm & 7); 303 if (reg & 8) 304 m |= REX_R << 16; 305 if (rm & 8) 306 m |= REX_B << 16; 307 } 308 return m; 309 } 310 311 /**************************************** 312 * Generate code for eecontext 313 */ 314 315 void genEEcode() 316 { 317 CodeBuilder cdb; 318 cdb.ctor(); 319 320 eecontext.EEin++; 321 regcon.immed.mval = 0; 322 regm_t retregs = 0; //regmask(eecontext.EEelem.Ety); 323 assert(EEStack.offset >= REGSIZE); 324 cod3_stackadj(cdb, cast(int)(EEStack.offset - REGSIZE)); 325 cdb.gen1(0x50 + SI); // PUSH ESI 326 cdb.genadjesp(cast(int)EEStack.offset); 327 gencodelem(cdb, eecontext.EEelem, &retregs, false); 328 code *c = cdb.finish(); 329 assignaddrc(c); 330 pinholeopt(c,null); 331 jmpaddr(c); 332 eecontext.EEcode = gen1(c, 0xCC); // INT 3 333 eecontext.EEin--; 334 } 335 336 337 /******************************************** 338 * Gen a save/restore sequence for mask of registers. 339 * Params: 340 * regm = mask of registers to save 341 * cdbsave = save code appended here 342 * cdbrestore = restore code appended here 343 * Returns: 344 * amount of stack consumed 345 */ 346 347 uint gensaverestore(regm_t regm,ref CodeBuilder cdbsave,ref CodeBuilder cdbrestore) 348 { 349 //printf("gensaverestore2(%s)\n", regm_str(regm)); 350 regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01; 351 if (!regm) 352 return 0; 353 354 uint stackused = 0; 355 356 code *[regm.sizeof * 8] restore; 357 358 reg_t i; 359 for (i = 0; regm; i++) 360 { 361 if (regm & 1) 362 { 363 code *cs2; 364 if (i == ES && I16) 365 { 366 stackused += REGSIZE; 367 cdbsave.gen1(0x06); // PUSH ES 368 cs2 = gen1(null, 0x07); // POP ES 369 } 370 else if (i == ST0 || i == ST01) 371 { 372 CodeBuilder cdb; 373 cdb.ctor(); 374 gensaverestore87(1 << i, cdbsave, cdb); 375 cs2 = cdb.finish(); 376 } 377 else if (i >= XMM0 || I64 || cgstate.funcarg.size) 378 { uint idx; 379 regsave.save(cdbsave, i, &idx); 380 CodeBuilder cdb; 381 cdb.ctor(); 382 regsave.restore(cdb, i, idx); 383 cs2 = cdb.finish(); 384 } 385 else 386 { 387 stackused += REGSIZE; 388 cdbsave.gen1(0x50 + (i & 7)); // PUSH i 389 cs2 = gen1(null, 0x58 + (i & 7)); // POP i 390 if (i & 8) 391 { code_orrex(cdbsave.last(), REX_B); 392 code_orrex(cs2, REX_B); 393 } 394 } 395 restore[i] = cs2; 396 } 397 else 398 restore[i] = null; 399 regm >>= 1; 400 } 401 402 while (i) 403 { 404 code *c = restore[--i]; 405 if (c) 406 { 407 cdbrestore.append(c); 408 } 409 } 410 411 return stackused; 412 } 413 414 415 /**************************************** 416 * Clean parameters off stack. 417 * Input: 418 * numpara amount to adjust stack pointer 419 * keepmsk mask of registers to not destroy 420 */ 421 422 void genstackclean(ref CodeBuilder cdb,uint numpara,regm_t keepmsk) 423 { 424 //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean); 425 if (numpara && (cgstate.stackclean || STACKALIGN >= 16)) 426 { 427 /+ 428 if (0 && // won't work if operand of scodelem 429 numpara == stackpush && // if this is all those pushed 430 needframe && // and there will be a BP 431 !config.windows && 432 !(regcon.mvar & fregsaved) // and no registers will be pushed 433 ) 434 genregs(cdb,0x89,BP,SP); // MOV SP,BP 435 else 436 +/ 437 { 438 regm_t scratchm = 0; 439 440 if (numpara == REGSIZE && config.flags4 & CFG4space) 441 { 442 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar; 443 } 444 445 if (scratchm) 446 { 447 reg_t r; 448 allocreg(cdb, &scratchm, &r, TYint); 449 cdb.gen1(0x58 + r); // POP r 450 } 451 else 452 cod3_stackadj(cdb, -numpara); 453 } 454 stackpush -= numpara; 455 cdb.genadjesp(-numpara); 456 } 457 } 458 459 /********************************* 460 * Generate code for a logical expression. 461 * Input: 462 * e elem 463 * jcond 464 * bit 1 if true then goto jump address if e 465 * if false then goto jump address if !e 466 * 2 don't call save87() 467 * fltarg FLcode or FLblock, flavor of target if e evaluates to jcond 468 * targ either code or block pointer to destination 469 */ 470 471 void logexp(ref CodeBuilder cdb, elem *e, int jcond, uint fltarg, code *targ) 472 { 473 //printf("logexp(e = %p, jcond = %d)\n", e, jcond); 474 int no87 = (jcond & 2) == 0; 475 docommas(cdb, &e); // scan down commas 476 cgstate.stackclean++; 477 478 code* c, ce; 479 if (!OTleaf(e.Eoper) && !e.Ecount) // if operator and not common sub 480 { 481 switch (e.Eoper) 482 { 483 case OPoror: 484 { 485 con_t regconsave; 486 if (jcond & 1) 487 { 488 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 489 regconsave = regcon; 490 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 491 } 492 else 493 { 494 code *cnop = gennop(null); 495 logexp(cdb, e.EV.E1, jcond | 1, FLcode, cnop); 496 regconsave = regcon; 497 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 498 cdb.append(cnop); 499 } 500 andregcon(®consave); 501 freenode(e); 502 cgstate.stackclean--; 503 return; 504 } 505 506 case OPandand: 507 { 508 con_t regconsave; 509 if (jcond & 1) 510 { 511 code *cnop = gennop(null); // a dummy target address 512 logexp(cdb, e.EV.E1, jcond & ~1, FLcode, cnop); 513 regconsave = regcon; 514 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 515 cdb.append(cnop); 516 } 517 else 518 { 519 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 520 regconsave = regcon; 521 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 522 } 523 andregcon(®consave); 524 freenode(e); 525 cgstate.stackclean--; 526 return; 527 } 528 529 case OPnot: 530 jcond ^= 1; 531 goto case OPbool; 532 533 case OPbool: 534 case OPs8_16: 535 case OPu8_16: 536 case OPs16_32: 537 case OPu16_32: 538 case OPs32_64: 539 case OPu32_64: 540 case OPu32_d: 541 case OPd_ld: 542 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 543 freenode(e); 544 cgstate.stackclean--; 545 return; 546 547 case OPcond: 548 { 549 code *cnop2 = gennop(null); // addresses of start of leaves 550 code *cnop = gennop(null); 551 logexp(cdb, e.EV.E1, false, FLcode, cnop2); // eval condition 552 con_t regconold = regcon; 553 logexp(cdb, e.EV.E2.EV.E1, jcond, fltarg, targ); 554 genjmp(cdb, JMP, FLcode, cast(block *) cnop); // skip second leaf 555 556 con_t regconsave = regcon; 557 regcon = regconold; 558 559 cdb.append(cnop2); 560 logexp(cdb, e.EV.E2.EV.E2, jcond, fltarg, targ); 561 andregcon(®conold); 562 andregcon(®consave); 563 freenode(e.EV.E2); 564 freenode(e); 565 cdb.append(cnop); 566 cgstate.stackclean--; 567 return; 568 } 569 570 default: 571 break; 572 } 573 } 574 575 /* Special code for signed long compare. 576 * Not necessary for I64 until we do cents. 577 */ 578 if (OTrel2(e.Eoper) && // if < <= >= > 579 !e.Ecount && 580 ( (I16 && tybasic(e.EV.E1.Ety) == TYlong && tybasic(e.EV.E2.Ety) == TYlong) || 581 (I32 && tybasic(e.EV.E1.Ety) == TYllong && tybasic(e.EV.E2.Ety) == TYllong)) 582 ) 583 { 584 longcmp(cdb, e, jcond != 0, fltarg, targ); 585 cgstate.stackclean--; 586 return; 587 } 588 589 regm_t retregs = mPSW; // return result in flags 590 opcode_t op = jmpopcode(e); // get jump opcode 591 if (!(jcond & 1)) 592 op ^= 0x101; // toggle jump condition(s) 593 codelem(cdb, e, &retregs, true); // evaluate elem 594 if (no87) 595 cse_flush(cdb,no87); // flush CSE's to memory 596 genjmp(cdb, op, fltarg, cast(block *) targ); // generate jmp instruction 597 cgstate.stackclean--; 598 } 599 600 /****************************** 601 * Routine to aid in setting things up for gen(). 602 * Look for common subexpression. 603 * Can handle indirection operators, but not if they're common subs. 604 * Input: 605 * e -> elem where we get some of the data from 606 * cs -> partially filled code to add 607 * op = opcode 608 * reg = reg field of (mod reg r/m) 609 * offset = data to be added to Voffset field 610 * keepmsk = mask of registers we must not destroy 611 * desmsk = mask of registers destroyed by executing the instruction 612 * Returns: 613 * pointer to code generated 614 */ 615 616 void loadea(ref CodeBuilder cdb,elem *e,code *cs,uint op,uint reg,targ_size_t offset, 617 regm_t keepmsk,regm_t desmsk) 618 { 619 code* c, cg, cd; 620 621 debug 622 if (debugw) 623 printf("loadea: e=%p cs=%p op=x%x reg=%s offset=%lld keepmsk=%s desmsk=%s\n", 624 e, cs, op, regstring[reg], cast(ulong)offset, regm_str(keepmsk), regm_str(desmsk)); 625 assert(e); 626 cs.Iflags = 0; 627 cs.Irex = 0; 628 cs.Iop = op; 629 tym_t tym = e.Ety; 630 int sz = tysize(tym); 631 632 /* Determine if location we want to get is in a register. If so, */ 633 /* substitute the register for the EA. */ 634 /* Note that operators don't go through this. CSE'd operators are */ 635 /* picked up by comsub(). */ 636 if (e.Ecount && /* if cse */ 637 e.Ecount != e.Ecomsub && /* and cse was generated */ 638 op != LEA && op != 0xC4 && /* and not an LEA or LES */ 639 (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ 640 (op & 0xFFF8) != 0xD8) // and not 8087 opcode 641 { 642 assert(OTleaf(e.Eoper)); /* can't handle this */ 643 regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs 644 if (op == 0xFF && reg == 6) 645 rm &= ~XMMREGS; // can't PUSH an XMM register 646 if (sz > REGSIZE) // value is in 2 or 4 registers 647 { 648 if (I16 && sz == 8) // value is in 4 registers 649 { 650 static immutable regm_t[4] rmask = [ mDX,mCX,mBX,mAX ]; 651 rm &= rmask[cast(size_t)(offset >> 1)]; 652 } 653 else if (offset) 654 rm &= mMSW; /* only high words */ 655 else 656 rm &= mLSW; /* only low words */ 657 } 658 for (uint i = 0; rm; i++) 659 { 660 if (mask(i) & rm) 661 { 662 if (regcon.cse.value[i] == e && // if register has elem 663 /* watch out for a CWD destroying DX */ 664 !(i == DX && op == 0xF7 && desmsk & mDX)) 665 { 666 /* if ES, then it can only be a load */ 667 if (i == ES) 668 { 669 if (op != 0x8B) 670 break; // not a load 671 cs.Iop = 0x8C; /* MOV reg,ES */ 672 cs.Irm = modregrm(3, 0, reg & 7); 673 if (reg & 8) 674 code_orrex(cs, REX_B); 675 } 676 else // XXX reg,i 677 { 678 cs.Irm = modregrm(3, reg & 7, i & 7); 679 if (reg & 8) 680 cs.Irex |= REX_R; 681 if (i & 8) 682 cs.Irex |= REX_B; 683 if (sz == 1 && I64 && (i >= 4 || reg >= 4)) 684 cs.Irex |= REX; 685 if (I64 && (sz == 8 || sz == 16)) 686 cs.Irex |= REX_W; 687 } 688 goto L2; 689 } 690 rm &= ~mask(i); 691 } 692 } 693 } 694 695 getlvalue(cdb, cs, e, keepmsk); 696 if (offset == REGSIZE) 697 getlvalue_msw(cs); 698 else 699 cs.IEV1.Voffset += offset; 700 if (I64) 701 { 702 if (reg >= 4 && sz == 1) // if byte register 703 // Can only address those 8 bit registers if a REX byte is present 704 cs.Irex |= REX; 705 if ((op & 0xFFFFFFF8) == 0xD8) 706 cs.Irex &= ~REX_W; // not needed for x87 ops 707 if (mask(reg) & XMMREGS && 708 (op == LODSD || op == STOSD)) 709 cs.Irex &= ~REX_W; // not needed for xmm ops 710 } 711 code_newreg(cs, reg); // OR in reg field 712 if (!I16) 713 { 714 if (reg == 6 && op == 0xFF || /* don't PUSH a word */ 715 op == MOVZXw || op == MOVSXw || /* MOVZX/MOVSX */ 716 (op & 0xFFF8) == 0xD8 || /* 8087 instructions */ 717 op == LEA) /* LEA */ 718 { 719 cs.Iflags &= ~CFopsize; 720 if (reg == 6 && op == 0xFF) // if PUSH 721 cs.Irex &= ~REX_W; // REX is ignored for PUSH anyway 722 } 723 } 724 else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT()) 725 cs.Iflags |= CFwait; 726 L2: 727 getregs(cdb, desmsk); // save any regs we destroy 728 729 /* KLUDGE! fix up DX for divide instructions */ 730 if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ 731 { 732 if (reg == 7) /* if IDIV */ 733 { 734 cdb.gen1(0x99); // CWD 735 if (I64 && sz == 8) 736 code_orrex(cdb.last(), REX_W); 737 } 738 else if (reg == 6) // if DIV 739 genregs(cdb, 0x33, DX, DX); // XOR DX,DX 740 } 741 742 // Eliminate MOV reg,reg 743 if ((cs.Iop & ~3) == 0x88 && 744 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 745 { 746 uint r = cs.Irm & 7; 747 if (cs.Irex & REX_B) 748 r |= 8; 749 if (r == reg) 750 cs.Iop = NOP; 751 } 752 753 // Eliminate MOV xmmreg,xmmreg 754 if ((cs.Iop & ~(LODSD ^ STOSS)) == LODSD && // detect LODSD, LODSS, STOSD, STOSS 755 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 756 { 757 reg_t r = cs.Irm & 7; 758 if (cs.Irex & REX_B) 759 r |= 8; 760 if (r == (reg - XMM0)) 761 cs.Iop = NOP; 762 } 763 764 cdb.gen(cs); 765 } 766 767 768 /************************** 769 * Get addressing mode. 770 */ 771 772 uint getaddrmode(regm_t idxregs) 773 { 774 uint mode; 775 776 if (I16) 777 { 778 static ubyte error() { assert(0); } 779 780 mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ 781 (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ 782 (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ 783 error(); 784 } 785 else 786 { 787 const reg = findreg(idxregs & (ALLREGS | mBP)); 788 if (reg == R12) 789 mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4); 790 else 791 mode = modregrmx(2,0,reg); 792 } 793 return mode; 794 } 795 796 void setaddrmode(code *c, regm_t idxregs) 797 { 798 uint mode = getaddrmode(idxregs); 799 c.Irm = mode & 0xFF; 800 c.Isib = (mode >> 8) & 0xFF; 801 c.Irex &= ~REX_B; 802 c.Irex |= mode >> 16; 803 } 804 805 /********************************************** 806 */ 807 808 void getlvalue_msw(code *c) 809 { 810 if (c.IFL1 == FLreg) 811 { 812 const regmsw = c.IEV1.Vsym.Sregmsw; 813 c.Irm = (c.Irm & ~7) | (regmsw & 7); 814 if (regmsw & 8) 815 c.Irex |= REX_B; 816 else 817 c.Irex &= ~REX_B; 818 } 819 else 820 c.IEV1.Voffset += REGSIZE; 821 } 822 823 /********************************************** 824 */ 825 826 void getlvalue_lsw(code *c) 827 { 828 if (c.IFL1 == FLreg) 829 { 830 const reglsw = c.IEV1.Vsym.Sreglsw; 831 c.Irm = (c.Irm & ~7) | (reglsw & 7); 832 if (reglsw & 8) 833 c.Irex |= REX_B; 834 else 835 c.Irex &= ~REX_B; 836 } 837 else 838 c.IEV1.Voffset -= REGSIZE; 839 } 840 841 /****************** 842 * Compute addressing mode. 843 * Generate & return sequence of code (if any). 844 * Return in cs the info on it. 845 * Input: 846 * pcs -> where to store data about addressing mode 847 * e -> the lvalue elem 848 * keepmsk mask of registers we must not destroy or use 849 * if (keepmsk & RMstore), this will be only a store operation 850 * into the lvalue 851 * if (keepmsk & RMload), this will be a read operation only 852 */ 853 854 void getlvalue(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk) 855 { 856 uint fl, f, opsave; 857 elem* e1, e11, e12; 858 bool e1isadd, e1free; 859 reg_t reg; 860 tym_t e1ty; 861 Symbol* s; 862 863 //printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 864 //elem_print(e); 865 assert(e); 866 elem_debug(e); 867 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 868 { 869 s = e.EV.Vsym; 870 fl = s.Sfl; 871 if (tyfloating(s.ty())) 872 objmod.fltused(); 873 } 874 else 875 fl = FLoper; 876 pcs.IFL1 = cast(ubyte)fl; 877 pcs.Iflags = CFoff; /* only want offsets */ 878 pcs.Irex = 0; 879 pcs.IEV1.Voffset = 0; 880 881 tym_t ty = e.Ety; 882 uint sz = tysize(ty); 883 if (tyfloating(ty)) 884 objmod.fltused(); 885 if (I64 && (sz == 8 || sz == 16) && !tyvector(ty)) 886 pcs.Irex |= REX_W; 887 if (!I16 && sz == SHORTSIZE) 888 pcs.Iflags |= CFopsize; 889 if (ty & mTYvolatile) 890 pcs.Iflags |= CFvolatile; 891 892 switch (fl) 893 { 894 case FLoper: 895 debug 896 if (debugw) printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 897 898 switch (e.Eoper) 899 { 900 case OPadd: // this way when we want to do LEA 901 e1 = e; 902 e1free = false; 903 e1isadd = true; 904 break; 905 906 case OPind: 907 case OPpostinc: // when doing (*p++ = ...) 908 case OPpostdec: // when doing (*p-- = ...) 909 case OPbt: 910 case OPbtc: 911 case OPbtr: 912 case OPbts: 913 case OPvecfill: 914 e1 = e.EV.E1; 915 e1free = true; 916 e1isadd = e1.Eoper == OPadd; 917 break; 918 919 default: 920 printf("function: %s\n", funcsym_p.Sident.ptr); 921 elem_print(e); 922 assert(0); 923 } 924 e1ty = tybasic(e1.Ety); 925 if (e1isadd) 926 { 927 e12 = e1.EV.E2; 928 e11 = e1.EV.E1; 929 } 930 931 /* First see if we can replace *(e+&v) with 932 * MOV idxreg,e 933 * EA = [ES:] &v+idxreg 934 */ 935 f = FLconst; 936 937 /* Is address of `s` relative to RIP ? 938 */ 939 static bool relativeToRIP(Symbol* s) 940 { 941 if (!I64) 942 return false; 943 if (config.exe == EX_WIN64) 944 return true; 945 if (config.flags3 & CFG3pie) 946 { 947 if (s.Sfl == FLtlsdata || s.ty() & mTYthread) 948 { 949 if (s.Sclass == SCglobal || s.Sclass == SCstatic || s.Sclass == SClocstat) 950 return false; 951 } 952 return true; 953 } 954 else 955 return (config.flags3 & CFG3pic) != 0; 956 } 957 958 if (e1isadd && 959 ((e12.Eoper == OPrelconst && 960 !relativeToRIP(e12.EV.Vsym) && 961 (f = el_fl(e12)) != FLfardata 962 ) || 963 (e12.Eoper == OPconst && !I16 && !e1.Ecount && (!I64 || el_signx32(e12)))) && 964 e1.Ecount == e1.Ecomsub && 965 (!e1.Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) && 966 tysize(e11.Ety) == REGSIZE 967 ) 968 { 969 uint t; /* component of r/m field */ 970 int ss; 971 int ssi; 972 973 if (e12.Eoper == OPrelconst) 974 f = el_fl(e12); 975 /*assert(datafl[f]);*/ /* what if addr of func? */ 976 if (!I16) 977 { /* Any register can be an index register */ 978 regm_t idxregs = allregs & ~keepmsk; 979 assert(idxregs); 980 981 /* See if e1.EV.E1 can be a scaled index */ 982 ss = isscaledindex(e11); 983 if (ss) 984 { 985 /* Load index register with result of e11.EV.E1 */ 986 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 987 reg = findreg(idxregs); 988 { 989 t = stackfl[f] ? 2 : 0; 990 pcs.Irm = modregrm(t, 0, 4); 991 pcs.Isib = modregrm(ss, reg & 7, 5); 992 if (reg & 8) 993 pcs.Irex |= REX_X; 994 } 995 } 996 else if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && 997 !e11.Ecount && 998 e11.EV.E2.Eoper == OPconst && 999 (ssi = ssindex(e11.Eoper, e11.EV.E2.EV.Vuns)) != 0 1000 ) 1001 { 1002 regm_t scratchm; 1003 1004 char ssflags = ssindex_array[ssi].ssflags; 1005 if (ssflags & SSFLnobp && stackfl[f]) 1006 goto L6; 1007 1008 // Load index register with result of e11.EV.E1 1009 scodelem(cdb, e11.EV.E1, &idxregs, keepmsk, true); 1010 reg = findreg(idxregs); 1011 1012 int ss1 = ssindex_array[ssi].ss1; 1013 if (ssflags & SSFLlea) 1014 { 1015 assert(!stackfl[f]); 1016 pcs.Irm = modregrm(2,0,4); 1017 pcs.Isib = modregrm(ss1, reg & 7, reg & 7); 1018 if (reg & 8) 1019 pcs.Irex |= REX_X | REX_B; 1020 } 1021 else 1022 { 1023 int rbase; 1024 reg_t r; 1025 1026 scratchm = ALLREGS & ~keepmsk; 1027 allocreg(cdb, &scratchm, &r, TYint); 1028 1029 if (ssflags & SSFLnobase1) 1030 { 1031 t = 0; 1032 rbase = 5; 1033 } 1034 else 1035 { 1036 t = 0; 1037 rbase = reg; 1038 if (rbase == BP || rbase == R13) 1039 { 1040 static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1]; 1041 1042 // IMUL r,BP,imm32 1043 cdb.genc2(0x69, modregxrmx(3, r, rbase), imm32[ss1]); 1044 goto L7; 1045 } 1046 } 1047 1048 cdb.gen2sib(LEA, modregxrm(t, r, 4), modregrm(ss1, reg & 7 ,rbase & 7)); 1049 if (reg & 8) 1050 code_orrex(cdb.last(), REX_X); 1051 if (rbase & 8) 1052 code_orrex(cdb.last(), REX_B); 1053 if (I64) 1054 code_orrex(cdb.last(), REX_W); 1055 1056 if (ssflags & SSFLnobase1) 1057 { 1058 cdb.last().IFL1 = FLconst; 1059 cdb.last().IEV1.Vuns = 0; 1060 } 1061 L7: 1062 if (ssflags & SSFLnobase) 1063 { 1064 t = stackfl[f] ? 2 : 0; 1065 rbase = 5; 1066 } 1067 else 1068 { 1069 t = 2; 1070 rbase = r; 1071 assert(rbase != BP); 1072 } 1073 pcs.Irm = modregrm(t, 0, 4); 1074 pcs.Isib = modregrm(ssindex_array[ssi].ss2, r & 7, rbase & 7); 1075 if (r & 8) 1076 pcs.Irex |= REX_X; 1077 if (rbase & 8) 1078 pcs.Irex |= REX_B; 1079 } 1080 freenode(e11.EV.E2); 1081 freenode(e11); 1082 } 1083 else 1084 { 1085 L6: 1086 /* Load index register with result of e11 */ 1087 scodelem(cdb, e11, &idxregs, keepmsk, true); 1088 setaddrmode(pcs, idxregs); 1089 if (stackfl[f]) /* if we need [EBP] too */ 1090 { 1091 uint idx = pcs.Irm & 7; 1092 if (pcs.Irex & REX_B) 1093 pcs.Irex = (pcs.Irex & ~REX_B) | REX_X; 1094 pcs.Isib = modregrm(0, idx, BP); 1095 pcs.Irm = modregrm(2, 0, 4); 1096 } 1097 } 1098 } 1099 else 1100 { 1101 regm_t idxregs = IDXREGS & ~keepmsk; /* only these can be index regs */ 1102 assert(idxregs); 1103 if (stackfl[f]) /* if stack data type */ 1104 { 1105 idxregs &= mSI | mDI; /* BX can't index off stack */ 1106 if (!idxregs) goto L1; /* index regs aren't avail */ 1107 t = 6; /* [BP+SI+disp] */ 1108 } 1109 else 1110 t = 0; /* [SI + disp] */ 1111 scodelem(cdb, e11, &idxregs, keepmsk, true); // load idx reg 1112 pcs.Irm = cast(ubyte)(getaddrmode(idxregs) ^ t); 1113 } 1114 if (f == FLpara) 1115 refparam = true; 1116 else if (f == FLauto || f == FLbprel || f == FLfltreg || f == FLfast) 1117 reflocal = true; 1118 else if (f == FLcsdata || tybasic(e12.Ety) == TYcptr) 1119 pcs.Iflags |= CFcs; 1120 else 1121 assert(f != FLreg); 1122 pcs.IFL1 = cast(ubyte)f; 1123 if (f != FLconst) 1124 pcs.IEV1.Vsym = e12.EV.Vsym; 1125 pcs.IEV1.Voffset = e12.EV.Voffset; /* += ??? */ 1126 1127 /* If e1 is a CSE, we must generate an addressing mode */ 1128 /* but also leave EA in registers so others can use it */ 1129 if (e1.Ecount) 1130 { 1131 uint flagsave; 1132 1133 regm_t idxregs = IDXREGS & ~keepmsk; 1134 allocreg(cdb, &idxregs, ®, TYoffset); 1135 1136 /* If desired result is a far pointer, we'll have */ 1137 /* to load another register with the segment of v */ 1138 if (e1ty == TYfptr) 1139 { 1140 reg_t msreg; 1141 1142 idxregs |= mMSW & ALLREGS & ~keepmsk; 1143 allocreg(cdb, &idxregs, &msreg, TYfptr); 1144 msreg = findregmsw(idxregs); 1145 /* MOV msreg,segreg */ 1146 genregs(cdb, 0x8C, segfl[f], msreg); 1147 } 1148 opsave = pcs.Iop; 1149 flagsave = pcs.Iflags; 1150 ubyte rexsave = pcs.Irex; 1151 pcs.Iop = LEA; 1152 code_newreg(pcs, reg); 1153 if (!I16) 1154 pcs.Iflags &= ~CFopsize; 1155 if (I64) 1156 pcs.Irex |= REX_W; 1157 cdb.gen(pcs); // LEA idxreg,EA 1158 cssave(e1,idxregs,true); 1159 if (!I16) 1160 { 1161 pcs.Iflags = flagsave; 1162 pcs.Irex = rexsave; 1163 } 1164 if (stackfl[f] && (config.wflags & WFssneds)) // if pointer into stack 1165 pcs.Iflags |= CFss; // add SS: override 1166 pcs.Iop = opsave; 1167 pcs.IFL1 = FLoffset; 1168 pcs.IEV1.Vuns = 0; 1169 setaddrmode(pcs, idxregs); 1170 } 1171 freenode(e12); 1172 if (e1free) 1173 freenode(e1); 1174 goto Lptr; 1175 } 1176 1177 L1: 1178 1179 /* The rest of the cases could be a far pointer */ 1180 1181 regm_t idxregs; 1182 idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs 1183 assert(idxregs); 1184 if (!I16 && 1185 (sz == REGSIZE || (I64 && sz == 4)) && 1186 keepmsk & RMstore) 1187 idxregs |= regcon.mvar; 1188 1189 switch (e1ty) 1190 { 1191 case TYfptr: /* if far pointer */ 1192 case TYhptr: 1193 idxregs = (mES | IDXREGS) & ~keepmsk; // need segment too 1194 assert(idxregs & mES); 1195 pcs.Iflags |= CFes; /* ES segment override */ 1196 break; 1197 1198 case TYsptr: /* if pointer to stack */ 1199 if (config.wflags & WFssneds) // if SS != DS 1200 pcs.Iflags |= CFss; /* then need SS: override */ 1201 break; 1202 1203 case TYfgPtr: 1204 if (I32) 1205 pcs.Iflags |= CFgs; 1206 else if (I64) 1207 pcs.Iflags |= CFfs; 1208 else 1209 assert(0); 1210 break; 1211 1212 case TYcptr: /* if pointer to code */ 1213 pcs.Iflags |= CFcs; /* then need CS: override */ 1214 break; 1215 1216 default: 1217 break; 1218 } 1219 pcs.IFL1 = FLoffset; 1220 pcs.IEV1.Vuns = 0; 1221 1222 /* see if we can replace *(e+c) with 1223 * MOV idxreg,e 1224 * [MOV ES,segment] 1225 * EA = [ES:]c[idxreg] 1226 */ 1227 if (e1isadd && e12.Eoper == OPconst && 1228 (!I64 || el_signx32(e12)) && 1229 (tysize(e12.Ety) == REGSIZE || (I64 && tysize(e12.Ety) == 4)) && 1230 (!e1.Ecount || !e1free) 1231 ) 1232 { 1233 int ss; 1234 1235 pcs.IEV1.Vuns = e12.EV.Vuns; 1236 freenode(e12); 1237 if (e1free) freenode(e1); 1238 if (!I16 && e11.Eoper == OPadd && !e11.Ecount && 1239 tysize(e11.Ety) == REGSIZE) 1240 { 1241 e12 = e11.EV.E2; 1242 e11 = e11.EV.E1; 1243 e1 = e1.EV.E1; 1244 e1free = true; 1245 goto L4; 1246 } 1247 if (!I16 && (ss = isscaledindex(e11)) != 0) 1248 { // (v * scale) + const 1249 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 1250 reg = findreg(idxregs); 1251 pcs.Irm = modregrm(0, 0, 4); 1252 pcs.Isib = modregrm(ss, reg & 7, 5); 1253 if (reg & 8) 1254 pcs.Irex |= REX_X; 1255 } 1256 else 1257 { 1258 scodelem(cdb, e11, &idxregs, keepmsk, true); // load index reg 1259 setaddrmode(pcs, idxregs); 1260 } 1261 goto Lptr; 1262 } 1263 1264 /* Look for *(v1 + v2) 1265 * EA = [v1][v2] 1266 */ 1267 1268 if (!I16 && e1isadd && (!e1.Ecount || !e1free) && 1269 (_tysize[e1ty] == REGSIZE || (I64 && _tysize[e1ty] == 4))) 1270 { 1271 L4: 1272 regm_t idxregs2; 1273 uint base, index; 1274 1275 // Look for *(v1 + v2 << scale) 1276 int ss = isscaledindex(e12); 1277 if (ss) 1278 { 1279 scodelem(cdb, e11, &idxregs, keepmsk, true); 1280 idxregs2 = allregs & ~(idxregs | keepmsk); 1281 cdisscaledindex(cdb, e12, &idxregs2, keepmsk | idxregs); 1282 } 1283 1284 // Look for *(v1 << scale + v2) 1285 else if ((ss = isscaledindex(e11)) != 0) 1286 { 1287 idxregs2 = idxregs; 1288 cdisscaledindex(cdb, e11, &idxregs2, keepmsk); 1289 idxregs = allregs & ~(idxregs2 | keepmsk); 1290 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1291 } 1292 // Look for *(((v1 << scale) + c1) + v2) 1293 else if (e11.Eoper == OPadd && !e11.Ecount && 1294 e11.EV.E2.Eoper == OPconst && 1295 (ss = isscaledindex(e11.EV.E1)) != 0 1296 ) 1297 { 1298 pcs.IEV1.Vuns = e11.EV.E2.EV.Vuns; 1299 idxregs2 = idxregs; 1300 cdisscaledindex(cdb, e11.EV.E1, &idxregs2, keepmsk); 1301 idxregs = allregs & ~(idxregs2 | keepmsk); 1302 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1303 freenode(e11.EV.E2); 1304 freenode(e11); 1305 } 1306 else 1307 { 1308 scodelem(cdb, e11, &idxregs, keepmsk, true); 1309 idxregs2 = allregs & ~(idxregs | keepmsk); 1310 scodelem(cdb, e12, &idxregs2, keepmsk | idxregs, true); 1311 } 1312 base = findreg(idxregs); 1313 index = findreg(idxregs2); 1314 pcs.Irm = modregrm(2, 0, 4); 1315 pcs.Isib = modregrm(ss, index & 7, base & 7); 1316 if (index & 8) 1317 pcs.Irex |= REX_X; 1318 if (base & 8) 1319 pcs.Irex |= REX_B; 1320 if (e1free) 1321 freenode(e1); 1322 1323 goto Lptr; 1324 } 1325 1326 /* give up and replace *e1 with 1327 * MOV idxreg,e 1328 * EA = 0[idxreg] 1329 * pinholeopt() will usually correct the 0, we need it in case 1330 * we have a pointer to a long and need an offset to the second 1331 * word. 1332 */ 1333 1334 assert(e1free); 1335 scodelem(cdb, e1, &idxregs, keepmsk, true); // load index register 1336 setaddrmode(pcs, idxregs); 1337 Lptr: 1338 if (config.flags3 & CFG3ptrchk) 1339 cod3_ptrchk(cdb, pcs, keepmsk); // validate pointer code 1340 break; 1341 1342 case FLdatseg: 1343 assert(0); 1344 static if (0) 1345 { 1346 pcs.Irm = modregrm(0, 0, BPRM); 1347 pcs.IEVpointer1 = e.EVpointer; 1348 break; 1349 } 1350 1351 case FLfltreg: 1352 reflocal = true; 1353 pcs.Irm = modregrm(2, 0, BPRM); 1354 pcs.IEV1.Vint = 0; 1355 break; 1356 1357 case FLreg: 1358 goto L2; 1359 1360 case FLpara: 1361 if (s.Sclass == SCshadowreg) 1362 goto case FLfast; 1363 Lpara: 1364 refparam = true; 1365 pcs.Irm = modregrm(2, 0, BPRM); 1366 goto L2; 1367 1368 case FLauto: 1369 case FLfast: 1370 if (regParamInPreg(s)) 1371 { 1372 regm_t pregm = s.Spregm(); 1373 /* See if the parameter is still hanging about in a register, 1374 * and so can we load from that register instead. 1375 */ 1376 if (regcon.params & pregm /*&& s.Spreg2 == NOREG && !(pregm & XMMREGS)*/) 1377 { 1378 if (keepmsk & RMload && !anyiasm) 1379 { 1380 auto voffset = e.EV.Voffset; 1381 if (sz <= REGSIZE) 1382 { 1383 const reg_t preg = (voffset >= REGSIZE) ? s.Spreg2 : s.Spreg; 1384 if (voffset >= REGSIZE) 1385 voffset -= REGSIZE; 1386 1387 /* preg could be NOREG if it's a variadic function and we're 1388 * in Win64 shadow regs and we're offsetting to get to the start 1389 * of the variadic args. 1390 */ 1391 if (preg != NOREG && regcon.params & mask(preg)) 1392 { 1393 //printf("sz %d, preg %s, Voffset %d\n", cast(int)sz, regm_str(mask(preg)), cast(int)voffset); 1394 if (mask(preg) & XMMREGS && sz != REGSIZE) 1395 { 1396 /* The following fails with this from std.math on Linux64: 1397 void main() 1398 { 1399 alias T = float; 1400 T x = T.infinity; 1401 T e = T.infinity; 1402 int eptr; 1403 T v = frexp(x, eptr); 1404 assert(isIdentical(e, v)); 1405 } 1406 */ 1407 } 1408 else if (voffset == 0) 1409 { 1410 pcs.Irm = modregrm(3, 0, preg & 7); 1411 if (preg & 8) 1412 pcs.Irex |= REX_B; 1413 if (I64 && sz == 1 && preg >= 4) 1414 pcs.Irex |= REX; 1415 regcon.used |= mask(preg); 1416 break; 1417 } 1418 else if (voffset == 1 && sz == 1 && preg < 4) 1419 { 1420 pcs.Irm = modregrm(3, 0, 4 | preg); // use H register 1421 regcon.used |= mask(preg); 1422 break; 1423 } 1424 } 1425 } 1426 } 1427 else 1428 regcon.params &= ~pregm; 1429 } 1430 } 1431 if (s.Sclass == SCshadowreg) 1432 goto Lpara; 1433 goto case FLbprel; 1434 1435 case FLbprel: 1436 reflocal = true; 1437 pcs.Irm = modregrm(2, 0, BPRM); 1438 goto L2; 1439 1440 case FLextern: 1441 if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) 1442 { 1443 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1444 { 1445 assert(0); 1446 } 1447 else static if (TARGET_WINDOS) 1448 { 1449 if (I64) 1450 { // GS:[88] 1451 pcs.Irm = modregrm(0, 0, 4); 1452 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1453 pcs.IFL1 = FLconst; 1454 pcs.IEV1.Vuns = 88; 1455 pcs.Iflags = CFgs; 1456 pcs.Irex |= REX_W; 1457 break; 1458 } 1459 else 1460 { 1461 pcs.Iflags |= CFfs; // add FS: override 1462 } 1463 } 1464 } 1465 if (s.ty() & mTYcs && cast(bool) LARGECODE) 1466 goto Lfardata; 1467 goto L3; 1468 1469 case FLdata: 1470 case FLudata: 1471 case FLcsdata: 1472 case FLgot: 1473 case FLgotoff: 1474 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1475 { 1476 case FLtlsdata: 1477 } 1478 L3: 1479 pcs.Irm = modregrm(0, 0, BPRM); 1480 L2: 1481 if (fl == FLreg) 1482 { 1483 //printf("test: FLreg, %s %d regcon.mvar = %s\n", 1484 // s.Sident.ptr, cast(int)e.EV.Voffset, regm_str(regcon.mvar)); 1485 if (!(s.Sregm & regcon.mvar)) 1486 symbol_print(s); 1487 assert(s.Sregm & regcon.mvar); 1488 1489 /* Attempting to paint a float as an integer or an integer as a float 1490 * will cause serious problems since the EA is loaded separatedly from 1491 * the opcode. The only way to deal with this is to prevent enregistering 1492 * such variables. 1493 */ 1494 if (tyxmmreg(ty) && !(s.Sregm & XMMREGS) || 1495 !tyxmmreg(ty) && (s.Sregm & XMMREGS)) 1496 cgreg_unregister(s.Sregm); 1497 1498 if ( 1499 s.Sclass == SCregpar || 1500 s.Sclass == SCparameter) 1501 { refparam = true; 1502 reflocal = true; // kludge to set up prolog 1503 } 1504 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1505 if (s.Sreglsw & 8) 1506 pcs.Irex |= REX_B; 1507 if (e.EV.Voffset == REGSIZE && sz == REGSIZE) 1508 { 1509 pcs.Irm = modregrm(3, 0, s.Sregmsw & 7); 1510 if (s.Sregmsw & 8) 1511 pcs.Irex |= REX_B; 1512 else 1513 pcs.Irex &= ~REX_B; 1514 } 1515 else if (e.EV.Voffset == 1 && sz == 1) 1516 { 1517 assert(s.Sregm & BYTEREGS); 1518 assert(s.Sreglsw < 4); 1519 pcs.Irm |= 4; // use 2nd byte of register 1520 } 1521 else 1522 { 1523 assert(!e.EV.Voffset); 1524 if (I64 && sz == 1 && s.Sreglsw >= 4) 1525 pcs.Irex |= REX; 1526 } 1527 } 1528 else if (s.ty() & mTYcs && !(fl == FLextern && LARGECODE)) 1529 { 1530 pcs.Iflags |= CFcs | CFoff; 1531 } 1532 if (config.flags3 & CFG3pic && 1533 (fl == FLtlsdata || s.ty() & mTYthread)) 1534 { 1535 if (I32) 1536 { 1537 if (config.flags3 & CFG3pie) 1538 { 1539 pcs.Iflags |= CFgs; 1540 } 1541 } 1542 else if (I64) 1543 { 1544 if (config.flags3 & CFG3pie && 1545 (s.Sclass == SCglobal || s.Sclass == SCstatic || s.Sclass == SClocstat)) 1546 { 1547 pcs.Iflags |= CFfs; 1548 pcs.Irm = modregrm(0, 0, 4); 1549 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1550 } 1551 else 1552 { 1553 pcs.Iflags |= CFopsize; 1554 pcs.Irex = 0x48; 1555 } 1556 } 1557 } 1558 pcs.IEV1.Vsym = s; 1559 pcs.IEV1.Voffset = e.EV.Voffset; 1560 if (sz == 1) 1561 { /* Don't use SI or DI for this variable */ 1562 s.Sflags |= GTbyte; 1563 if (I64 ? e.EV.Voffset > 0 : e.EV.Voffset > 1) 1564 { 1565 debug if (debugr) printf("'%s' not reg cand due to byte offset\n", s.Sident.ptr); 1566 s.Sflags &= ~GTregcand; 1567 } 1568 } 1569 else if (e.EV.Voffset || sz > tysize(s.Stype.Tty)) 1570 { 1571 debug if (debugr) printf("'%s' not reg cand due to offset or size\n", s.Sident.ptr); 1572 s.Sflags &= ~GTregcand; 1573 } 1574 1575 if (config.fpxmmregs && tyfloating(s.ty()) && !tyfloating(ty)) 1576 { 1577 debug if (debugr) printf("'%s' not reg cand due to mix float and int\n", s.Sident.ptr); 1578 // Can't successfully mix XMM register variables accessed as integers 1579 s.Sflags &= ~GTregcand; 1580 } 1581 1582 if (!(keepmsk & RMstore)) // if not store only 1583 s.Sflags |= SFLread; // assume we are doing a read 1584 break; 1585 1586 case FLpseudo: 1587 version (MARS) 1588 { 1589 { 1590 getregs(cdb, mask(s.Sreglsw)); 1591 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1592 if (s.Sreglsw & 8) 1593 pcs.Irex |= REX_B; 1594 if (e.EV.Voffset == 1 && sz == 1) 1595 { assert(s.Sregm & BYTEREGS); 1596 assert(s.Sreglsw < 4); 1597 pcs.Irm |= 4; // use 2nd byte of register 1598 } 1599 else 1600 { assert(!e.EV.Voffset); 1601 if (I64 && sz == 1 && s.Sreglsw >= 4) 1602 pcs.Irex |= REX; 1603 } 1604 break; 1605 } 1606 } 1607 else 1608 { 1609 { 1610 uint u = s.Sreglsw; 1611 getregs(cdb, pseudomask[u]); 1612 pcs.Irm = modregrm(3, 0, pseudoreg[u] & 7); 1613 break; 1614 } 1615 } 1616 1617 case FLfardata: 1618 case FLfunc: /* reading from code seg */ 1619 if (config.exe & EX_flat) 1620 goto L3; 1621 Lfardata: 1622 { 1623 regm_t regm = ALLREGS & ~keepmsk; // need scratch register 1624 allocreg(cdb, ®m, ®, TYint); 1625 getregs(cdb,mES); 1626 // MOV mreg,seg of symbol 1627 cdb.gencs(0xB8 + reg, 0, FLextern, s); 1628 cdb.last().Iflags = CFseg; 1629 cdb.gen2(0x8E, modregrmx(3, 0, reg)); // MOV ES,reg 1630 pcs.Iflags |= CFes | CFoff; /* ES segment override */ 1631 goto L3; 1632 } 1633 1634 case FLstack: 1635 assert(!I16); 1636 pcs.Irm = modregrm(2, 0, 4); 1637 pcs.Isib = modregrm(0, 4, SP); 1638 pcs.IEV1.Vsym = s; 1639 pcs.IEV1.Voffset = e.EV.Voffset; 1640 break; 1641 1642 default: 1643 WRFL(cast(FL)fl); 1644 symbol_print(s); 1645 assert(0); 1646 } 1647 } 1648 1649 /***************************** 1650 * Given an opcode and EA in cs, generate code 1651 * for each floating register in turn. 1652 * Input: 1653 * tym either TYdouble or TYfloat 1654 */ 1655 1656 void fltregs(ref CodeBuilder cdb, code* pcs, tym_t tym) 1657 { 1658 assert(!I64); 1659 tym = tybasic(tym); 1660 if (I32) 1661 { 1662 getregs(cdb,(tym == TYfloat) ? mAX : mAX | mDX); 1663 if (tym != TYfloat) 1664 { 1665 pcs.IEV1.Voffset += REGSIZE; 1666 NEWREG(pcs.Irm,DX); 1667 cdb.gen(pcs); 1668 pcs.IEV1.Voffset -= REGSIZE; 1669 } 1670 NEWREG(pcs.Irm,AX); 1671 cdb.gen(pcs); 1672 } 1673 else 1674 { 1675 getregs(cdb,(tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16); 1676 pcs.IEV1.Voffset += (tym == TYfloat) ? 2 : 6; 1677 if (tym == TYfloat) 1678 NEWREG(pcs.Irm, DX); 1679 else 1680 NEWREG(pcs.Irm, AX); 1681 cdb.gen(pcs); 1682 pcs.IEV1.Voffset -= 2; 1683 if (tym == TYfloat) 1684 NEWREG(pcs.Irm, AX); 1685 else 1686 NEWREG(pcs.Irm, BX); 1687 cdb.gen(pcs); 1688 if (tym != TYfloat) 1689 { 1690 pcs.IEV1.Voffset -= 2; 1691 NEWREG(pcs.Irm, CX); 1692 cdb.gen(pcs); 1693 pcs.IEV1.Voffset -= 2; /* note that exit is with Voffset unaltered */ 1694 NEWREG(pcs.Irm, DX); 1695 cdb.gen(pcs); 1696 } 1697 } 1698 } 1699 1700 1701 /***************************** 1702 * Given a result in registers, test it for true or false. 1703 * Will fail if TYfptr and the reg is ES! 1704 * If saveflag is true, preserve the contents of the 1705 * registers. 1706 */ 1707 1708 void tstresult(ref CodeBuilder cdb, regm_t regm, tym_t tym, uint saveflag) 1709 { 1710 reg_t scrreg; // scratch register 1711 regm_t scrregm; 1712 1713 //if (!(regm & (mBP | ALLREGS))) 1714 //printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n", 1715 //regm_str(regm),tym,saveflag); 1716 1717 assert(regm & (XMMREGS | mBP | ALLREGS)); 1718 tym = tybasic(tym); 1719 reg_t reg = findreg(regm); 1720 uint sz = _tysize[tym]; 1721 if (sz == 1) 1722 { 1723 assert(regm & BYTEREGS); 1724 genregs(cdb, 0x84, reg, reg); // TEST regL,regL 1725 if (I64 && reg >= 4) 1726 code_orrex(cdb.last(), REX); 1727 return; 1728 } 1729 if (regm & XMMREGS) 1730 { 1731 reg_t xreg; 1732 regm_t xregs = XMMREGS & ~regm; 1733 allocreg(cdb,&xregs, &xreg, TYdouble); 1734 opcode_t op = 0; 1735 if (tym == TYdouble || tym == TYidouble || tym == TYcdouble) 1736 op = 0x660000; 1737 cdb.gen2(op | 0x0F57, modregrm(3, xreg-XMM0, xreg-XMM0)); // XORPS xreg,xreg 1738 cdb.gen2(op | 0x0F2E, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1739 if (tym == TYcfloat || tym == TYcdouble) 1740 { code *cnop = gennop(null); 1741 genjmp(cdb, JNE, FLcode, cast(block *) cnop); // JNE L1 1742 genjmp(cdb, JP, FLcode, cast(block *) cnop); // JP L1 1743 reg = findreg(regm & ~mask(reg)); 1744 cdb.gen2(op | 0x0F2E, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1745 cdb.append(cnop); 1746 } 1747 return; 1748 } 1749 if (sz <= REGSIZE) 1750 { 1751 if (!I16) 1752 { 1753 if (tym == TYfloat) 1754 { 1755 if (saveflag) 1756 { 1757 scrregm = allregs & ~regm; // possible scratch regs 1758 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1759 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1760 reg = scrreg; 1761 } 1762 getregs(cdb, mask(reg)); 1763 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 1764 return; 1765 } 1766 gentstreg(cdb,reg); // TEST reg,reg 1767 if (sz == SHORTSIZE) 1768 cdb.last().Iflags |= CFopsize; // 16 bit operands 1769 else if (sz == 8) 1770 code_orrex(cdb.last(), REX_W); 1771 } 1772 else 1773 gentstreg(cdb, reg); // TEST reg,reg 1774 return; 1775 } 1776 1777 if (saveflag || tyfv(tym)) 1778 { 1779 L1: 1780 scrregm = ALLREGS & ~regm; // possible scratch regs 1781 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1782 if (I32 || sz == REGSIZE * 2) 1783 { 1784 assert(regm & mMSW && regm & mLSW); 1785 1786 reg = findregmsw(regm); 1787 if (I32) 1788 { 1789 if (tyfv(tym)) 1790 genregs(cdb, MOVZXw, scrreg, reg); // MOVZX scrreg,msreg 1791 else 1792 { 1793 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1794 if (tym == TYdouble || tym == TYdouble_alias) 1795 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1796 } 1797 } 1798 else 1799 { 1800 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1801 if (tym == TYfloat) 1802 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1803 } 1804 reg = findreglsw(regm); 1805 genorreg(cdb, scrreg, reg); // OR scrreg,lsreg 1806 } 1807 else if (sz == 8) 1808 { 1809 // !I32 1810 genmovreg(cdb, scrreg, AX); // MOV scrreg,AX 1811 if (tym == TYdouble || tym == TYdouble_alias) 1812 cdb.gen2(0xD1 ,modregrm(3, 4, scrreg)); // SHL scrreg,1 1813 genorreg(cdb, scrreg, BX); // OR scrreg,BX 1814 genorreg(cdb, scrreg, CX); // OR scrreg,CX 1815 genorreg(cdb, scrreg, DX); // OR scrreg,DX 1816 } 1817 else 1818 assert(0); 1819 } 1820 else 1821 { 1822 if (I32 || sz == REGSIZE * 2) 1823 { 1824 // can't test ES:LSW for 0 1825 assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP)); 1826 1827 reg = findregmsw(regm); 1828 if (regcon.mvar & mask(reg)) // if register variable 1829 goto L1; // don't trash it 1830 getregs(cdb, mask(reg)); // we're going to trash reg 1831 if (tyfloating(tym) && sz == 2 * _tysize[TYint]) 1832 cdb.gen2(0xD1, modregrm(3 ,4, reg)); // SHL reg,1 1833 genorreg(cdb, reg, findreglsw(regm)); // OR reg,reg+1 1834 if (I64) 1835 code_orrex(cdb.last(), REX_W); 1836 } 1837 else if (sz == 8) 1838 { assert(regm == DOUBLEREGS_16); 1839 getregs(cdb,mAX); // allocate AX 1840 if (tym == TYdouble || tym == TYdouble_alias) 1841 cdb.gen2(0xD1, modregrm(3, 4, AX)); // SHL AX,1 1842 genorreg(cdb, AX, BX); // OR AX,BX 1843 genorreg(cdb, AX, CX); // OR AX,CX 1844 genorreg(cdb, AX, DX); // OR AX,DX 1845 } 1846 else 1847 assert(0); 1848 } 1849 code_orflag(cdb.last(),CFpsw); 1850 } 1851 1852 /****************************** 1853 * Given the result of an expression is in retregs, 1854 * generate necessary code to return result in *pretregs. 1855 */ 1856 1857 void fixresult(ref CodeBuilder cdb, elem *e, regm_t retregs, regm_t *pretregs) 1858 { 1859 //printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs)); 1860 if (*pretregs == 0) return; // if don't want result 1861 assert(e && retregs); // need something to work with 1862 regm_t forccs = *pretregs & mPSW; 1863 regm_t forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS); 1864 tym_t tym = tybasic(e.Ety); 1865 1866 if (tym == TYstruct) 1867 { 1868 if (e.Eoper == OPpair || e.Eoper == OPrpair) 1869 { 1870 if (I64) 1871 tym = TYucent; 1872 else 1873 tym = TYullong; 1874 } 1875 else 1876 // Hack to support cdstreq() 1877 tym = (forregs & mMSW) ? TYfptr : TYnptr; 1878 } 1879 int sz = _tysize[tym]; 1880 1881 if (sz == 1) 1882 { 1883 assert(retregs & BYTEREGS); 1884 const reg = findreg(retregs); 1885 if (e.Eoper == OPvar && 1886 e.EV.Voffset == 1 && 1887 e.EV.Vsym.Sfl == FLreg) 1888 { 1889 assert(reg < 4); 1890 if (forccs) 1891 cdb.gen2(0x84, modregrm(3, reg | 4, reg | 4)); // TEST regH,regH 1892 forccs = 0; 1893 } 1894 } 1895 1896 reg_t reg,rreg; 1897 if ((retregs & forregs) == retregs) // if already in right registers 1898 *pretregs = retregs; 1899 else if (forregs) // if return the result in registers 1900 { 1901 if ((forregs | retregs) & (mST01 | mST0)) 1902 { 1903 fixresult87(cdb, e, retregs, pretregs); 1904 return; 1905 } 1906 uint opsflag = false; 1907 if (I16 && sz == 8) 1908 { 1909 if (forregs & mSTACK) 1910 { 1911 assert(retregs == DOUBLEREGS_16); 1912 // Push floating regs 1913 cdb.gen1(0x50 + AX); 1914 cdb.gen1(0x50 + BX); 1915 cdb.gen1(0x50 + CX); 1916 cdb.gen1(0x50 + DX); 1917 stackpush += DOUBLESIZE; 1918 } 1919 else if (retregs & mSTACK) 1920 { 1921 assert(forregs == DOUBLEREGS_16); 1922 // Pop floating regs 1923 getregs(cdb,forregs); 1924 cdb.gen1(0x58 + DX); 1925 cdb.gen1(0x58 + CX); 1926 cdb.gen1(0x58 + BX); 1927 cdb.gen1(0x58 + AX); 1928 stackpush -= DOUBLESIZE; 1929 retregs = DOUBLEREGS_16; // for tstresult() below 1930 } 1931 else 1932 { 1933 debug 1934 printf("retregs = %s, forregs = %s\n", regm_str(retregs), regm_str(forregs)), 1935 assert(0); 1936 } 1937 if (!OTleaf(e.Eoper)) 1938 opsflag = true; 1939 } 1940 else 1941 { 1942 allocreg(cdb, pretregs, &rreg, tym); // allocate return regs 1943 if (retregs & XMMREGS) 1944 { 1945 reg = findreg(retregs & XMMREGS); 1946 // MOVSD floatreg, XMM? 1947 cdb.genxmmreg(xmmstore(tym), reg, 0, tym); 1948 if (mask(rreg) & XMMREGS) 1949 // MOVSD XMM?, floatreg 1950 cdb.genxmmreg(xmmload(tym), rreg, 0, tym); 1951 else 1952 { 1953 // MOV rreg,floatreg 1954 cdb.genfltreg(0x8B,rreg,0); 1955 if (sz == 8) 1956 { 1957 if (I32) 1958 { 1959 rreg = findregmsw(*pretregs); 1960 cdb.genfltreg(0x8B, rreg,4); 1961 } 1962 else 1963 code_orrex(cdb.last(),REX_W); 1964 } 1965 } 1966 } 1967 else if (forregs & XMMREGS) 1968 { 1969 reg = findreg(retregs & (mBP | ALLREGS)); 1970 switch (sz) 1971 { 1972 case 4: 1973 cdb.gen2(LODD, modregxrmx(3, rreg - XMM0, reg)); // MOVD xmm,reg 1974 break; 1975 1976 case 8: 1977 if (I32) 1978 { 1979 cdb.genfltreg(0x89, reg, 0); 1980 reg = findregmsw(retregs); 1981 cdb.genfltreg(0x89, reg, 4); 1982 cdb.genxmmreg(xmmload(tym), rreg, 0, tym); // MOVQ xmm,mem 1983 } 1984 else 1985 { 1986 cdb.gen2(LODD /* [sic!] */, modregxrmx(3, rreg - XMM0, reg)); 1987 code_orrex(cdb.last(), REX_W); // MOVQ xmm,reg 1988 } 1989 break; 1990 1991 default: 1992 assert(false); 1993 } 1994 checkSetVex(cdb.last(), tym); 1995 } 1996 else if (sz > REGSIZE) 1997 { 1998 uint msreg = findregmsw(retregs); 1999 uint lsreg = findreglsw(retregs); 2000 uint msrreg = findregmsw(*pretregs); 2001 uint lsrreg = findreglsw(*pretregs); 2002 2003 genmovreg(cdb, msrreg, msreg); // MOV msrreg,msreg 2004 genmovreg(cdb, lsrreg, lsreg); // MOV lsrreg,lsreg 2005 } 2006 else 2007 { 2008 assert(!(retregs & XMMREGS)); 2009 assert(!(forregs & XMMREGS)); 2010 reg = findreg(retregs & (mBP | ALLREGS)); 2011 if (I64 && sz <= 4) 2012 genregs(cdb, 0x89, reg, rreg); // only move 32 bits, and zero the top 32 bits 2013 else 2014 genmovreg(cdb, rreg, reg); // MOV rreg,reg 2015 } 2016 } 2017 cssave(e,retregs | *pretregs,opsflag); 2018 // Commented out due to Bugzilla 8840 2019 //forregs = 0; // don't care about result in reg cuz real result is in rreg 2020 retregs = *pretregs & ~mPSW; 2021 } 2022 if (forccs) // if return result in flags 2023 { 2024 if (retregs & (mST01 | mST0)) 2025 fixresult87(cdb, e, retregs, pretregs); 2026 else 2027 tstresult(cdb, retregs, tym, forregs); 2028 } 2029 } 2030 2031 /******************************* 2032 * Extra information about each CLIB runtime library function. 2033 */ 2034 2035 enum 2036 { 2037 INF32 = 1, /// if 32 bit only 2038 INFfloat = 2, /// if this is floating point 2039 INFwkdone = 4, /// if weak extern is already done 2040 INF64 = 8, /// if 64 bit only 2041 INFpushebx = 0x10, /// push EBX before load_localgot() 2042 INFpusheabcdx = 0x20, /// pass EAX/EBX/ECX/EDX on stack, callee does ret 16 2043 } 2044 2045 struct ClibInfo 2046 { 2047 regm_t retregs16; /* registers that 16 bit result is returned in */ 2048 regm_t retregs32; /* registers that 32 bit result is returned in */ 2049 ubyte pop; // # of bytes popped off of stack upon return 2050 ubyte flags; /// INFxxx 2051 byte push87; // # of pushes onto the 8087 stack 2052 byte pop87; // # of pops off of the 8087 stack 2053 } 2054 2055 __gshared int clib_inited = false; // true if initialized 2056 2057 Symbol* symboly(const(char)* name, regm_t desregs) 2058 { 2059 Symbol *s = symbol_calloc(name); 2060 s.Stype = tsclib; 2061 s.Sclass = SCextern; 2062 s.Sfl = FLfunc; 2063 s.Ssymnum = 0; 2064 s.Sregsaved = ~desregs & (mBP | mES | ALLREGS); 2065 return s; 2066 } 2067 2068 void getClibInfo(uint clib, Symbol** ps, ClibInfo** pinfo) 2069 { 2070 __gshared Symbol*[CLIB.MAX] clibsyms; 2071 __gshared ClibInfo[CLIB.MAX] clibinfo; 2072 2073 if (!clib_inited) 2074 { 2075 for (size_t i = 0; i < CLIB.MAX; ++i) 2076 { 2077 Symbol* s = clibsyms[i]; 2078 if (s) 2079 { 2080 s.Sxtrnnum = 0; 2081 s.Stypidx = 0; 2082 clibinfo[i].flags &= ~INFwkdone; 2083 } 2084 } 2085 clib_inited = true; 2086 } 2087 2088 const uint ex_unix = (EX_LINUX | EX_LINUX64 | 2089 EX_OSX | EX_OSX64 | 2090 EX_FREEBSD | EX_FREEBSD64 | 2091 EX_OPENBSD | EX_OPENBSD64 | 2092 EX_DRAGONFLYBSD64 | 2093 EX_SOLARIS | EX_SOLARIS64); 2094 2095 ClibInfo* cinfo = &clibinfo[clib]; 2096 Symbol* s = clibsyms[clib]; 2097 if (!s) 2098 { 2099 2100 switch (clib) 2101 { 2102 case CLIB.lcmp: 2103 { 2104 const(char)* name = (config.exe & ex_unix) ? "__LCMP__" : "_LCMP@"; 2105 s = symboly(name, 0); 2106 } 2107 break; 2108 2109 case CLIB.lmul: 2110 { 2111 const(char)* name = (config.exe & ex_unix) ? "__LMUL__" : "_LMUL@"; 2112 s = symboly(name, mAX|mCX|mDX); 2113 cinfo.retregs16 = mDX|mAX; 2114 cinfo.retregs32 = mDX|mAX; 2115 } 2116 break; 2117 2118 case CLIB.ldiv: 2119 cinfo.retregs16 = mDX|mAX; 2120 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2121 { 2122 s = symboly("__divdi3", mAX|mBX|mCX|mDX); 2123 cinfo.flags = INFpushebx; 2124 cinfo.retregs32 = mDX|mAX; 2125 } 2126 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2127 { 2128 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2129 cinfo.flags = INFpushebx; 2130 cinfo.retregs32 = mDX|mAX; 2131 } 2132 else if (I32 && config.objfmt == OBJ_MSCOFF) 2133 { 2134 s = symboly("_alldiv", mAX|mBX|mCX|mDX); 2135 cinfo.flags = INFpusheabcdx; 2136 cinfo.retregs32 = mDX|mAX; 2137 } 2138 else 2139 { 2140 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2141 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2142 cinfo.retregs32 = mDX|mAX; 2143 } 2144 break; 2145 2146 case CLIB.lmod: 2147 cinfo.retregs16 = mCX|mBX; 2148 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2149 { 2150 s = symboly("__moddi3", mAX|mBX|mCX|mDX); 2151 cinfo.flags = INFpushebx; 2152 cinfo.retregs32 = mDX|mAX; 2153 } 2154 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2155 { 2156 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2157 cinfo.flags = INFpushebx; 2158 cinfo.retregs32 = mCX|mBX; 2159 } 2160 else if (I32 && config.objfmt == OBJ_MSCOFF) 2161 { 2162 s = symboly("_allrem", mAX|mBX|mCX|mDX); 2163 cinfo.flags = INFpusheabcdx; 2164 cinfo.retregs32 = mAX|mDX; 2165 } 2166 else 2167 { 2168 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2169 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2170 cinfo.retregs32 = mCX|mBX; 2171 } 2172 break; 2173 2174 case CLIB.uldiv: 2175 cinfo.retregs16 = mDX|mAX; 2176 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2177 { 2178 s = symboly("__udivdi3", mAX|mBX|mCX|mDX); 2179 cinfo.flags = INFpushebx; 2180 cinfo.retregs32 = mDX|mAX; 2181 } 2182 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2183 { 2184 s = symboly("__ULDIV2__", mAX|mBX|mCX|mDX); 2185 cinfo.flags = INFpushebx; 2186 cinfo.retregs32 = mDX|mAX; 2187 } 2188 else if (I32 && config.objfmt == OBJ_MSCOFF) 2189 { 2190 s = symboly("_aulldiv", mAX|mBX|mCX|mDX); 2191 cinfo.flags = INFpusheabcdx; 2192 cinfo.retregs32 = mDX|mAX; 2193 } 2194 else 2195 { 2196 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2197 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2198 cinfo.retregs32 = mDX|mAX; 2199 } 2200 break; 2201 2202 case CLIB.ulmod: 2203 cinfo.retregs16 = mCX|mBX; 2204 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2205 { 2206 s = symboly("__umoddi3", mAX|mBX|mCX|mDX); 2207 cinfo.flags = INFpushebx; 2208 cinfo.retregs32 = mDX|mAX; 2209 } 2210 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2211 { 2212 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2213 cinfo.flags = INFpushebx; 2214 cinfo.retregs32 = mCX|mBX; 2215 } 2216 else if (I32 && config.objfmt == OBJ_MSCOFF) 2217 { 2218 s = symboly("_aullrem", mAX|mBX|mCX|mDX); 2219 cinfo.flags = INFpusheabcdx; 2220 cinfo.retregs32 = mAX|mDX; 2221 } 2222 else 2223 { 2224 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2225 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2226 cinfo.retregs32 = mCX|mBX; 2227 } 2228 break; 2229 2230 // This section is only for Windows and DOS (i.e. machines without the x87 FPU) 2231 case CLIB.dmul: 2232 s = symboly("_DMUL@",mAX|mBX|mCX|mDX); 2233 cinfo.retregs16 = DOUBLEREGS_16; 2234 cinfo.retregs32 = DOUBLEREGS_32; 2235 cinfo.pop = 8; 2236 cinfo.flags = INFfloat; 2237 cinfo.push87 = 1; 2238 cinfo.pop87 = 1; 2239 break; 2240 2241 case CLIB.ddiv: 2242 s = symboly("_DDIV@",mAX|mBX|mCX|mDX); 2243 cinfo.retregs16 = DOUBLEREGS_16; 2244 cinfo.retregs32 = DOUBLEREGS_32; 2245 cinfo.pop = 8; 2246 cinfo.flags = INFfloat; 2247 cinfo.push87 = 1; 2248 cinfo.pop87 = 1; 2249 break; 2250 2251 case CLIB.dtst0: 2252 s = symboly("_DTST0@",0); 2253 cinfo.flags = INFfloat; 2254 break; 2255 2256 case CLIB.dtst0exc: 2257 s = symboly("_DTST0EXC@",0); 2258 cinfo.flags = INFfloat; 2259 break; 2260 2261 case CLIB.dcmp: 2262 s = symboly("_DCMP@",0); 2263 cinfo.pop = 8; 2264 cinfo.flags = INFfloat; 2265 cinfo.push87 = 1; 2266 cinfo.pop87 = 1; 2267 break; 2268 2269 case CLIB.dcmpexc: 2270 s = symboly("_DCMPEXC@",0); 2271 cinfo.pop = 8; 2272 cinfo.flags = INFfloat; 2273 cinfo.push87 = 1; 2274 cinfo.pop87 = 1; 2275 break; 2276 2277 case CLIB.dneg: 2278 s = symboly("_DNEG@",I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2279 cinfo.retregs16 = DOUBLEREGS_16; 2280 cinfo.retregs32 = DOUBLEREGS_32; 2281 cinfo.flags = INFfloat; 2282 break; 2283 2284 case CLIB.dadd: 2285 s = symboly("_DADD@",mAX|mBX|mCX|mDX); 2286 cinfo.retregs16 = DOUBLEREGS_16; 2287 cinfo.retregs32 = DOUBLEREGS_32; 2288 cinfo.pop = 8; 2289 cinfo.flags = INFfloat; 2290 cinfo.push87 = 1; 2291 cinfo.pop87 = 1; 2292 break; 2293 2294 case CLIB.dsub: 2295 s = symboly("_DSUB@",mAX|mBX|mCX|mDX); 2296 cinfo.retregs16 = DOUBLEREGS_16; 2297 cinfo.retregs32 = DOUBLEREGS_32; 2298 cinfo.pop = 8; 2299 cinfo.flags = INFfloat; 2300 cinfo.push87 = 1; 2301 cinfo.pop87 = 1; 2302 break; 2303 2304 case CLIB.fmul: 2305 s = symboly("_FMUL@",mAX|mBX|mCX|mDX); 2306 cinfo.retregs16 = FLOATREGS_16; 2307 cinfo.retregs32 = FLOATREGS_32; 2308 cinfo.flags = INFfloat; 2309 cinfo.push87 = 1; 2310 cinfo.pop87 = 1; 2311 break; 2312 2313 case CLIB.fdiv: 2314 s = symboly("_FDIV@",mAX|mBX|mCX|mDX); 2315 cinfo.retregs16 = FLOATREGS_16; 2316 cinfo.retregs32 = FLOATREGS_32; 2317 cinfo.flags = INFfloat; 2318 cinfo.push87 = 1; 2319 cinfo.pop87 = 1; 2320 break; 2321 2322 case CLIB.ftst0: 2323 s = symboly("_FTST0@",0); 2324 cinfo.flags = INFfloat; 2325 break; 2326 2327 case CLIB.ftst0exc: 2328 s = symboly("_FTST0EXC@",0); 2329 cinfo.flags = INFfloat; 2330 break; 2331 2332 case CLIB.fcmp: 2333 s = symboly("_FCMP@",0); 2334 cinfo.flags = INFfloat; 2335 cinfo.push87 = 1; 2336 cinfo.pop87 = 1; 2337 break; 2338 2339 case CLIB.fcmpexc: 2340 s = symboly("_FCMPEXC@",0); 2341 cinfo.flags = INFfloat; 2342 cinfo.push87 = 1; 2343 cinfo.pop87 = 1; 2344 break; 2345 2346 case CLIB.fneg: 2347 s = symboly("_FNEG@",I16 ? FLOATREGS_16 : FLOATREGS_32); 2348 cinfo.retregs16 = FLOATREGS_16; 2349 cinfo.retregs32 = FLOATREGS_32; 2350 cinfo.flags = INFfloat; 2351 break; 2352 2353 case CLIB.fadd: 2354 s = symboly("_FADD@",mAX|mBX|mCX|mDX); 2355 cinfo.retregs16 = FLOATREGS_16; 2356 cinfo.retregs32 = FLOATREGS_32; 2357 cinfo.flags = INFfloat; 2358 cinfo.push87 = 1; 2359 cinfo.pop87 = 1; 2360 break; 2361 2362 case CLIB.fsub: 2363 s = symboly("_FSUB@",mAX|mBX|mCX|mDX); 2364 cinfo.retregs16 = FLOATREGS_16; 2365 cinfo.retregs32 = FLOATREGS_32; 2366 cinfo.flags = INFfloat; 2367 cinfo.push87 = 1; 2368 cinfo.pop87 = 1; 2369 break; 2370 2371 case CLIB.dbllng: 2372 { 2373 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG" : "_DBLLNG@"; 2374 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2375 cinfo.retregs16 = mDX | mAX; 2376 cinfo.retregs32 = mAX; 2377 cinfo.flags = INFfloat; 2378 cinfo.push87 = 1; 2379 cinfo.pop87 = 1; 2380 break; 2381 } 2382 2383 case CLIB.lngdbl: 2384 { 2385 const(char)* name = (config.exe & ex_unix) ? "__LNGDBL" : "_LNGDBL@"; 2386 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2387 cinfo.retregs16 = DOUBLEREGS_16; 2388 cinfo.retregs32 = DOUBLEREGS_32; 2389 cinfo.flags = INFfloat; 2390 cinfo.push87 = 1; 2391 cinfo.pop87 = 1; 2392 break; 2393 } 2394 2395 case CLIB.dblint: 2396 { 2397 const(char)* name = (config.exe & ex_unix) ? "__DBLINT" : "_DBLINT@"; 2398 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2399 cinfo.retregs16 = mAX; 2400 cinfo.retregs32 = mAX; 2401 cinfo.flags = INFfloat; 2402 cinfo.push87 = 1; 2403 cinfo.pop87 = 1; 2404 break; 2405 } 2406 2407 case CLIB.intdbl: 2408 { 2409 const(char)* name = (config.exe & ex_unix) ? "__INTDBL" : "_INTDBL@"; 2410 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2411 cinfo.retregs16 = DOUBLEREGS_16; 2412 cinfo.retregs32 = DOUBLEREGS_32; 2413 cinfo.flags = INFfloat; 2414 cinfo.push87 = 1; 2415 cinfo.pop87 = 1; 2416 break; 2417 } 2418 2419 case CLIB.dbluns: 2420 { 2421 const(char)* name = (config.exe & ex_unix) ? "__DBLUNS" : "_DBLUNS@"; 2422 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2423 cinfo.retregs16 = mAX; 2424 cinfo.retregs32 = mAX; 2425 cinfo.flags = INFfloat; 2426 cinfo.push87 = 1; 2427 cinfo.pop87 = 1; 2428 break; 2429 } 2430 2431 case CLIB.unsdbl: 2432 // Y(DOUBLEREGS_32,"__UNSDBL"), // CLIB.unsdbl 2433 // Y(DOUBLEREGS_16,"_UNSDBL@"), 2434 // {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _UNSDBL@ unsdbl 2435 { 2436 const(char)* name = (config.exe & ex_unix) ? "__UNSDBL" : "_UNSDBL@"; 2437 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2438 cinfo.retregs16 = DOUBLEREGS_16; 2439 cinfo.retregs32 = DOUBLEREGS_32; 2440 cinfo.flags = INFfloat; 2441 cinfo.push87 = 1; 2442 cinfo.pop87 = 1; 2443 break; 2444 } 2445 2446 case CLIB.dblulng: 2447 { 2448 const(char)* name = (config.exe & ex_unix) ? "__DBLULNG" : "_DBLULNG@"; 2449 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2450 cinfo.retregs16 = mDX|mAX; 2451 cinfo.retregs32 = mAX; 2452 cinfo.flags = (config.exe & ex_unix) ? INFfloat | INF32 : INFfloat; 2453 cinfo.push87 = (config.exe & ex_unix) ? 0 : 1; 2454 cinfo.pop87 = 1; 2455 break; 2456 } 2457 2458 case CLIB.ulngdbl: 2459 { 2460 const(char)* name = (config.exe & ex_unix) ? "__ULNGDBL@" : "_ULNGDBL@"; 2461 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2462 cinfo.retregs16 = DOUBLEREGS_16; 2463 cinfo.retregs32 = DOUBLEREGS_32; 2464 cinfo.flags = INFfloat; 2465 cinfo.push87 = 1; 2466 cinfo.pop87 = 1; 2467 break; 2468 } 2469 2470 case CLIB.dblflt: 2471 { 2472 const(char)* name = (config.exe & ex_unix) ? "__DBLFLT" : "_DBLFLT@"; 2473 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2474 cinfo.retregs16 = FLOATREGS_16; 2475 cinfo.retregs32 = FLOATREGS_32; 2476 cinfo.flags = INFfloat; 2477 cinfo.push87 = 1; 2478 cinfo.pop87 = 1; 2479 break; 2480 } 2481 2482 case CLIB.fltdbl: 2483 { 2484 const(char)* name = (config.exe & ex_unix) ? "__FLTDBL" : "_FLTDBL@"; 2485 s = symboly(name, I16 ? ALLREGS : DOUBLEREGS_32); 2486 cinfo.retregs16 = DOUBLEREGS_16; 2487 cinfo.retregs32 = DOUBLEREGS_32; 2488 cinfo.flags = INFfloat; 2489 cinfo.push87 = 1; 2490 cinfo.pop87 = 1; 2491 break; 2492 } 2493 2494 case CLIB.dblllng: 2495 { 2496 const(char)* name = (config.exe & ex_unix) ? "__DBLLLNG" : "_DBLLLNG@"; 2497 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2498 cinfo.retregs16 = DOUBLEREGS_16; 2499 cinfo.retregs32 = mDX|mAX; 2500 cinfo.flags = INFfloat; 2501 cinfo.push87 = 1; 2502 cinfo.pop87 = 1; 2503 break; 2504 } 2505 2506 case CLIB.llngdbl: 2507 { 2508 const(char)* name = (config.exe & ex_unix) ? "__LLNGDBL" : "_LLNGDBL@"; 2509 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2510 cinfo.retregs16 = DOUBLEREGS_16; 2511 cinfo.retregs32 = DOUBLEREGS_32; 2512 cinfo.flags = INFfloat; 2513 cinfo.push87 = 1; 2514 cinfo.pop87 = 1; 2515 break; 2516 } 2517 2518 case CLIB.dblullng: 2519 { 2520 if (config.exe == EX_WIN64) 2521 { 2522 s = symboly("__DBLULLNG", DOUBLEREGS_32); 2523 cinfo.retregs32 = mAX; 2524 cinfo.flags = INFfloat; 2525 cinfo.push87 = 2; 2526 cinfo.pop87 = 2; 2527 } 2528 else 2529 { 2530 const(char)* name = (config.exe & ex_unix) ? "__DBLULLNG" : "_DBLULLNG@"; 2531 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2532 cinfo.retregs16 = DOUBLEREGS_16; 2533 cinfo.retregs32 = I64 ? mAX : mDX|mAX; 2534 cinfo.flags = INFfloat; 2535 cinfo.push87 = (config.exe & ex_unix) ? 2 : 1; 2536 cinfo.pop87 = (config.exe & ex_unix) ? 2 : 1; 2537 } 2538 break; 2539 } 2540 2541 case CLIB.ullngdbl: 2542 { 2543 if (config.exe == EX_WIN64) 2544 { 2545 s = symboly("__ULLNGDBL", DOUBLEREGS_32); 2546 cinfo.retregs32 = mAX; 2547 cinfo.flags = INFfloat; 2548 cinfo.push87 = 1; 2549 cinfo.pop87 = 1; 2550 } 2551 else 2552 { 2553 const(char)* name = (config.exe & ex_unix) ? "__ULLNGDBL" : "_ULLNGDBL@"; 2554 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2555 cinfo.retregs16 = DOUBLEREGS_16; 2556 cinfo.retregs32 = I64 ? mAX : DOUBLEREGS_32; 2557 cinfo.flags = INFfloat; 2558 cinfo.push87 = 1; 2559 cinfo.pop87 = 1; 2560 } 2561 break; 2562 } 2563 2564 case CLIB.dtst: 2565 { 2566 const(char)* name = (config.exe & ex_unix) ? "__DTST" : "_DTST@"; 2567 s = symboly(name, 0); 2568 cinfo.flags = INFfloat; 2569 break; 2570 } 2571 2572 case CLIB.vptrfptr: 2573 { 2574 const(char)* name = (config.exe & ex_unix) ? "__HTOFPTR" : "_HTOFPTR@"; 2575 s = symboly(name, mES|mBX); 2576 cinfo.retregs16 = mES|mBX; 2577 cinfo.retregs32 = mES|mBX; 2578 break; 2579 } 2580 2581 case CLIB.cvptrfptr: 2582 { 2583 const(char)* name = (config.exe & ex_unix) ? "__HCTOFPTR" : "_HCTOFPTR@"; 2584 s = symboly(name, mES|mBX); 2585 cinfo.retregs16 = mES|mBX; 2586 cinfo.retregs32 = mES|mBX; 2587 break; 2588 } 2589 2590 case CLIB._87topsw: 2591 { 2592 const(char)* name = (config.exe & ex_unix) ? "__87TOPSW" : "_87TOPSW@"; 2593 s = symboly(name, 0); 2594 cinfo.flags = INFfloat; 2595 break; 2596 } 2597 2598 case CLIB.fltto87: 2599 { 2600 const(char)* name = (config.exe & ex_unix) ? "__FLTTO87" : "_FLTTO87@"; 2601 s = symboly(name, mST0); 2602 cinfo.retregs16 = mST0; 2603 cinfo.retregs32 = mST0; 2604 cinfo.flags = INFfloat; 2605 cinfo.push87 = 1; 2606 break; 2607 } 2608 2609 case CLIB.dblto87: 2610 { 2611 const(char)* name = (config.exe & ex_unix) ? "__DBLTO87" : "_DBLTO87@"; 2612 s = symboly(name, mST0); 2613 cinfo.retregs16 = mST0; 2614 cinfo.retregs32 = mST0; 2615 cinfo.flags = INFfloat; 2616 cinfo.push87 = 1; 2617 break; 2618 } 2619 2620 case CLIB.dblint87: 2621 { 2622 const(char)* name = (config.exe & ex_unix) ? "__DBLINT87" : "_DBLINT87@"; 2623 s = symboly(name, mST0|mAX); 2624 cinfo.retregs16 = mAX; 2625 cinfo.retregs32 = mAX; 2626 cinfo.flags = INFfloat; 2627 break; 2628 } 2629 2630 case CLIB.dbllng87: 2631 { 2632 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG87" : "_DBLLNG87@"; 2633 s = symboly(name, mST0|mAX|mDX); 2634 cinfo.retregs16 = mDX|mAX; 2635 cinfo.retregs32 = mAX; 2636 cinfo.flags = INFfloat; 2637 break; 2638 } 2639 2640 case CLIB.ftst: 2641 { 2642 const(char)* name = (config.exe & ex_unix) ? "__FTST" : "_FTST@"; 2643 s = symboly(name, 0); 2644 cinfo.flags = INFfloat; 2645 break; 2646 } 2647 2648 case CLIB.fcompp: 2649 { 2650 const(char)* name = (config.exe & ex_unix) ? "__FCOMPP" : "_FCOMPP@"; 2651 s = symboly(name, 0); 2652 cinfo.retregs16 = mPSW; 2653 cinfo.retregs32 = mPSW; 2654 cinfo.flags = INFfloat; 2655 cinfo.pop87 = 2; 2656 break; 2657 } 2658 2659 case CLIB.ftest: 2660 { 2661 const(char)* name = (config.exe & ex_unix) ? "__FTEST" : "_FTEST@"; 2662 s = symboly(name, 0); 2663 cinfo.retregs16 = mPSW; 2664 cinfo.retregs32 = mPSW; 2665 cinfo.flags = INFfloat; 2666 break; 2667 } 2668 2669 case CLIB.ftest0: 2670 { 2671 const(char)* name = (config.exe & ex_unix) ? "__FTEST0" : "_FTEST0@"; 2672 s = symboly(name, 0); 2673 cinfo.retregs16 = mPSW; 2674 cinfo.retregs32 = mPSW; 2675 cinfo.flags = INFfloat; 2676 break; 2677 } 2678 2679 case CLIB.fdiv87: 2680 { 2681 const(char)* name = (config.exe & ex_unix) ? "__FDIVP" : "_FDIVP"; 2682 s = symboly(name, mST0|mAX|mBX|mCX|mDX); 2683 cinfo.retregs16 = mST0; 2684 cinfo.retregs32 = mST0; 2685 cinfo.flags = INFfloat; 2686 cinfo.push87 = 1; 2687 cinfo.pop87 = 1; 2688 break; 2689 } 2690 2691 // Complex numbers 2692 case CLIB.cmul: 2693 { 2694 s = symboly("_Cmul", mST0|mST01); 2695 cinfo.retregs16 = mST01; 2696 cinfo.retregs32 = mST01; 2697 cinfo.flags = INF32|INFfloat; 2698 cinfo.push87 = 3; 2699 cinfo.pop87 = 5; 2700 break; 2701 } 2702 2703 case CLIB.cdiv: 2704 { 2705 s = symboly("_Cdiv", mAX|mCX|mDX|mST0|mST01); 2706 cinfo.retregs16 = mST01; 2707 cinfo.retregs32 = mST01; 2708 cinfo.flags = INF32|INFfloat; 2709 cinfo.push87 = 0; 2710 cinfo.pop87 = 2; 2711 break; 2712 } 2713 2714 case CLIB.ccmp: 2715 { 2716 s = symboly("_Ccmp", mAX|mST0|mST01); 2717 cinfo.retregs16 = mPSW; 2718 cinfo.retregs32 = mPSW; 2719 cinfo.flags = INF32|INFfloat; 2720 cinfo.push87 = 0; 2721 cinfo.pop87 = 4; 2722 break; 2723 } 2724 2725 case CLIB.u64_ldbl: 2726 { 2727 const(char)* name = (config.exe & ex_unix) ? "__U64_LDBL" : "_U64_LDBL"; 2728 s = symboly(name, mST0); 2729 cinfo.retregs16 = mST0; 2730 cinfo.retregs32 = mST0; 2731 cinfo.flags = INF32|INF64|INFfloat; 2732 cinfo.push87 = 2; 2733 cinfo.pop87 = 1; 2734 break; 2735 } 2736 2737 case CLIB.ld_u64: 2738 { 2739 const(char)* name = (config.exe & ex_unix) ? (config.objfmt == OBJ_ELF || 2740 config.objfmt == OBJ_MACH ? 2741 "__LDBLULLNG" : "___LDBLULLNG") 2742 : "__LDBLULLNG"; 2743 s = symboly(name, mST0|mAX|mDX); 2744 cinfo.retregs16 = 0; 2745 cinfo.retregs32 = mDX|mAX; 2746 cinfo.flags = INF32|INF64|INFfloat; 2747 cinfo.push87 = 1; 2748 cinfo.pop87 = 2; 2749 break; 2750 } 2751 2752 default: 2753 assert(0); 2754 } 2755 clibsyms[clib] = s; 2756 } 2757 2758 *ps = s; 2759 *pinfo = cinfo; 2760 } 2761 2762 /******************************** 2763 * Generate code sequence to call C runtime library support routine. 2764 * clib = CLIB.xxxx 2765 * keepmask = mask of registers not to destroy. Currently can 2766 * handle only 1. Should use a temporary rather than 2767 * push/pop for speed. 2768 */ 2769 2770 void callclib(ref CodeBuilder cdb, elem* e, uint clib, regm_t* pretregs, regm_t keepmask) 2771 { 2772 //printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask)); 2773 //elem_print(e); 2774 2775 Symbol* s; 2776 ClibInfo* cinfo; 2777 getClibInfo(clib, &s, &cinfo); 2778 2779 if (I16) 2780 assert(!(cinfo.flags & (INF32 | INF64))); 2781 getregs(cdb,(~s.Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed 2782 keepmask &= ~s.Sregsaved; 2783 int npushed = numbitsset(keepmask); 2784 CodeBuilder cdbpop; 2785 cdbpop.ctor(); 2786 gensaverestore(keepmask, cdb, cdbpop); 2787 2788 save87regs(cdb,cinfo.push87); 2789 for (int i = 0; i < cinfo.push87; i++) 2790 push87(cdb); 2791 2792 for (int i = 0; i < cinfo.pop87; i++) 2793 pop87(); 2794 2795 if (config.target_cpu >= TARGET_80386 && clib == CLIB.lmul && !I32) 2796 { 2797 static immutable ubyte[23] lmul = 2798 [ 2799 0x66,0xc1,0xe1,0x10, // shl ECX,16 2800 0x8b,0xcb, // mov CX,BX ;ECX = CX,BX 2801 0x66,0xc1,0xe0,0x10, // shl EAX,16 2802 0x66,0x0f,0xac,0xd0,0x10, // shrd EAX,EDX,16 ;EAX = DX,AX 2803 0x66,0xf7,0xe1, // mul ECX 2804 0x66,0x0f,0xa4,0xc2,0x10, // shld EDX,EAX,16 ;DX,AX = EAX 2805 ]; 2806 2807 cdb.genasm(cast(char*)lmul.ptr, lmul.sizeof); 2808 } 2809 else 2810 { 2811 makeitextern(s); 2812 int nalign = 0; 2813 int pushebx = (cinfo.flags & INFpushebx) != 0; 2814 int pushall = (cinfo.flags & INFpusheabcdx) != 0; 2815 if (STACKALIGN >= 16) 2816 { // Align the stack (assume no args on stack) 2817 int npush = (npushed + pushebx + 4 * pushall) * REGSIZE + stackpush; 2818 if (npush & (STACKALIGN - 1)) 2819 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 2820 cod3_stackadj(cdb, nalign); 2821 } 2822 } 2823 if (pushebx) 2824 { 2825 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_FREEBSD | EX_FREEBSD64 | EX_DRAGONFLYBSD64)) 2826 { 2827 cdb.gen1(0x50 + CX); // PUSH ECX 2828 cdb.gen1(0x50 + BX); // PUSH EBX 2829 cdb.gen1(0x50 + DX); // PUSH EDX 2830 cdb.gen1(0x50 + AX); // PUSH EAX 2831 nalign += 4 * REGSIZE; 2832 } 2833 else 2834 { 2835 cdb.gen1(0x50 + BX); // PUSH EBX 2836 nalign += REGSIZE; 2837 } 2838 } 2839 if (pushall) 2840 { 2841 cdb.gen1(0x50 + CX); // PUSH ECX 2842 cdb.gen1(0x50 + BX); // PUSH EBX 2843 cdb.gen1(0x50 + DX); // PUSH EDX 2844 cdb.gen1(0x50 + AX); // PUSH EAX 2845 } 2846 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) 2847 { 2848 // Note: not for OSX 2849 /* Pass EBX on the stack instead, this is because EBX is used 2850 * for shared library function calls 2851 */ 2852 if (config.flags3 & CFG3pic) 2853 { 2854 load_localgot(cdb); // EBX gets set to this value 2855 } 2856 } 2857 2858 cdb.gencs(LARGECODE ? 0x9A : 0xE8,0,FLfunc,s); // CALL s 2859 if (nalign) 2860 cod3_stackadj(cdb, -nalign); 2861 calledafunc = 1; 2862 2863 version (SCPP) 2864 { 2865 if (I16 && // bug in Optlink for weak references 2866 config.flags3 & CFG3wkfloat && 2867 (cinfo.flags & (INFfloat | INFwkdone)) == INFfloat) 2868 { 2869 cinfo.flags |= INFwkdone; 2870 makeitextern(getRtlsym(RTLSYM_INTONLY)); 2871 objmod.wkext(s, getRtlsym(RTLSYM_INTONLY)); 2872 } 2873 } 2874 } 2875 if (I16) 2876 stackpush -= cinfo.pop; 2877 regm_t retregs = I16 ? cinfo.retregs16 : cinfo.retregs32; 2878 cdb.append(cdbpop); 2879 fixresult(cdb, e, retregs, pretregs); 2880 } 2881 2882 2883 /************************************************* 2884 * Helper function for converting OPparam's into array of Parameters. 2885 */ 2886 struct Parameter { elem* e; reg_t reg; reg_t reg2; uint numalign; } 2887 2888 //void fillParameters(elem* e, Parameter* parameters, int* pi); 2889 2890 void fillParameters(elem* e, Parameter* parameters, int* pi) 2891 { 2892 if (e.Eoper == OPparam) 2893 { 2894 fillParameters(e.EV.E1, parameters, pi); 2895 fillParameters(e.EV.E2, parameters, pi); 2896 freenode(e); 2897 } 2898 else 2899 { 2900 parameters[*pi].e = e; 2901 (*pi)++; 2902 } 2903 } 2904 2905 /*********************************** 2906 * tyf: type of the function 2907 */ 2908 FuncParamRegs FuncParamRegs_create(tym_t tyf) 2909 { 2910 FuncParamRegs result; 2911 2912 result.tyf = tyf; 2913 2914 if (I16) 2915 { 2916 result.numintegerregs = 0; 2917 result.numfloatregs = 0; 2918 } 2919 else if (I32) 2920 { 2921 if (tyf == TYjfunc) 2922 { 2923 static immutable ubyte[1] reglist1 = [ AX ]; 2924 result.argregs = ®list1[0]; 2925 result.numintegerregs = reglist1.length; 2926 } 2927 else if (tyf == TYmfunc) 2928 { 2929 static immutable ubyte[1] reglist2 = [ CX ]; 2930 result.argregs = ®list2[0]; 2931 result.numintegerregs = reglist2.length; 2932 } 2933 else 2934 result.numintegerregs = 0; 2935 result.numfloatregs = 0; 2936 } 2937 else if (I64 && config.exe == EX_WIN64) 2938 { 2939 static immutable ubyte[4] reglist3 = [ CX,DX,R8,R9 ]; 2940 result.argregs = ®list3[0]; 2941 result.numintegerregs = reglist3.length; 2942 2943 static immutable ubyte[4] freglist3 = [ XMM0, XMM1, XMM2, XMM3 ]; 2944 result.floatregs = &freglist3[0]; 2945 result.numfloatregs = freglist3.length; 2946 } 2947 else if (I64) 2948 { 2949 static immutable ubyte[6] reglist4 = [ DI,SI,DX,CX,R8,R9 ]; 2950 result.argregs = ®list4[0]; 2951 result.numintegerregs = reglist4.length; 2952 2953 static immutable ubyte[8] freglist4 = [ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 ]; 2954 result.floatregs = &freglist4[0]; 2955 result.numfloatregs = freglist4.length; 2956 } 2957 else 2958 assert(0); 2959 return result; 2960 } 2961 2962 /***************************************** 2963 * Allocate parameter of type t and ty to registers *preg1 and *preg2. 2964 * Params: 2965 * t = type, valid only if ty is TYstruct or TYarray 2966 * Returns: 2967 * false not allocated to any register 2968 * true *preg1, *preg2 set to allocated register pair 2969 */ 2970 2971 //bool type_jparam2(type* t, tym_t ty); 2972 2973 private bool type_jparam2(type* t, tym_t ty) 2974 { 2975 ty = tybasic(ty); 2976 2977 if (tyfloating(ty)) 2978 return false; 2979 else if (ty == TYstruct || ty == TYarray) 2980 { 2981 type_debug(t); 2982 targ_size_t sz = type_size(t); 2983 return (sz <= _tysize[TYnptr]) && 2984 (config.exe == EX_WIN64 || sz == 1 || sz == 2 || sz == 4 || sz == 8); 2985 } 2986 else if (tysize(ty) <= _tysize[TYnptr]) 2987 return true; 2988 return false; 2989 } 2990 2991 int FuncParamRegs_alloc(ref FuncParamRegs fpr, type* t, tym_t ty, reg_t* preg1, reg_t* preg2) 2992 { 2993 //printf("FuncParamRegs::alloc(ty: TY%sm t: %p)\n", tystring[tybasic(ty)], t); 2994 //if (t) type_print(t); 2995 2996 *preg1 = NOREG; 2997 *preg2 = NOREG; 2998 2999 type* t2 = null; 3000 tym_t ty2 = TYMAX; 3001 3002 // SROA with mixed registers 3003 if (ty & mTYxmmgpr) 3004 { 3005 ty = TYdouble; 3006 ty2 = TYllong; 3007 } 3008 else if (ty & mTYgprxmm) 3009 { 3010 ty = TYllong; 3011 ty2 = TYdouble; 3012 } 3013 3014 // Treat array of 1 the same as its element type 3015 // (Don't put volatile parameters in registers) 3016 if (tybasic(ty) == TYarray && tybasic(t.Tty) == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile) 3017 && type_size(t.Tnext) > 1) 3018 { 3019 t = t.Tnext; 3020 ty = t.Tty; 3021 } 3022 3023 if (tybasic(ty) == TYstruct && type_zeroSize(t, fpr.tyf)) 3024 return 0; // don't allocate into registers 3025 3026 ++fpr.i; 3027 3028 // If struct or array 3029 if (tyaggregate(ty)) 3030 { 3031 assert(t); 3032 if (config.exe == EX_WIN64) 3033 { 3034 /* Structs occupy a general purpose register, regardless of the struct 3035 * size or the number & types of its fields. 3036 */ 3037 t = null; 3038 ty = TYnptr; 3039 } 3040 else 3041 { 3042 type* targ1, targ2; 3043 if (tybasic(t.Tty) == TYstruct) 3044 { 3045 targ1 = t.Ttag.Sstruct.Sarg1type; 3046 targ2 = t.Ttag.Sstruct.Sarg2type; 3047 } 3048 else if (tybasic(t.Tty) == TYarray) 3049 { 3050 if (I64) 3051 argtypes(t, targ1, targ2); 3052 } 3053 else 3054 assert(0); 3055 3056 if (targ1) 3057 { 3058 t = targ1; 3059 ty = t.Tty; 3060 if (targ2) 3061 { 3062 t2 = targ2; 3063 ty2 = t2.Tty; 3064 } 3065 } 3066 else if (I64 && !targ2) 3067 return 0; 3068 } 3069 } 3070 3071 reg_t* preg = preg1; 3072 int regcntsave = fpr.regcnt; 3073 int xmmcntsave = fpr.xmmcnt; 3074 3075 if (config.exe == EX_WIN64) 3076 { 3077 if (tybasic(ty) == TYcfloat) 3078 { 3079 ty = TYnptr; // treat like a struct 3080 } 3081 } 3082 else if (I64) 3083 { 3084 if ((tybasic(ty) == TYcent || tybasic(ty) == TYucent) && 3085 fpr.numintegerregs - fpr.regcnt >= 2) 3086 { 3087 // Allocate to register pair 3088 *preg1 = fpr.argregs[fpr.regcnt]; 3089 *preg2 = fpr.argregs[fpr.regcnt + 1]; 3090 fpr.regcnt += 2; 3091 return 1; 3092 } 3093 3094 if (tybasic(ty) == TYcdouble && 3095 fpr.numfloatregs - fpr.xmmcnt >= 2) 3096 { 3097 // Allocate to register pair 3098 *preg1 = fpr.floatregs[fpr.xmmcnt]; 3099 *preg2 = fpr.floatregs[fpr.xmmcnt + 1]; 3100 fpr.xmmcnt += 2; 3101 return 1; 3102 } 3103 3104 if (tybasic(ty) == TYcfloat 3105 && fpr.numfloatregs - fpr.xmmcnt >= 1) 3106 { 3107 // Allocate XMM register 3108 *preg1 = fpr.floatregs[fpr.xmmcnt++]; 3109 return 1; 3110 } 3111 } 3112 3113 foreach (j; 0 .. 2) 3114 { 3115 if (fpr.regcnt < fpr.numintegerregs) 3116 { 3117 if ((I64 || (fpr.i == 1 && (fpr.tyf == TYjfunc || fpr.tyf == TYmfunc))) && 3118 type_jparam2(t, ty)) 3119 { 3120 *preg = fpr.argregs[fpr.regcnt]; 3121 ++fpr.regcnt; 3122 if (config.exe == EX_WIN64) 3123 ++fpr.xmmcnt; 3124 goto Lnext; 3125 } 3126 } 3127 if (fpr.xmmcnt < fpr.numfloatregs) 3128 { 3129 if (tyxmmreg(ty)) 3130 { 3131 *preg = fpr.floatregs[fpr.xmmcnt]; 3132 if (config.exe == EX_WIN64) 3133 ++fpr.regcnt; 3134 ++fpr.xmmcnt; 3135 goto Lnext; 3136 } 3137 } 3138 // Failed to allocate to a register 3139 if (j == 1) 3140 { /* Unwind first preg1 assignment, because it's both or nothing 3141 */ 3142 *preg1 = NOREG; 3143 fpr.regcnt = regcntsave; 3144 fpr.xmmcnt = xmmcntsave; 3145 } 3146 return 0; 3147 3148 Lnext: 3149 if (tybasic(ty2) == TYMAX) 3150 break; 3151 preg = preg2; 3152 t = t2; 3153 ty = ty2; 3154 } 3155 return 1; 3156 } 3157 3158 /*************************************** 3159 * Finds replacemnt types for register passing of aggregates. 3160 */ 3161 void argtypes(type* t, ref type* arg1type, ref type* arg2type) 3162 { 3163 if (!t) return; 3164 3165 tym_t ty = t.Tty; 3166 3167 if (!tyaggregate(ty)) 3168 return; 3169 3170 arg1type = arg2type = null; 3171 3172 if (tybasic(ty) == TYarray) 3173 { 3174 size_t sz = cast(size_t) type_size(t); 3175 if (sz == 0) 3176 return; 3177 3178 if ((I32 || config.exe == EX_WIN64) && (sz & (sz - 1))) // power of 2 3179 return; 3180 3181 if (config.exe == EX_WIN64 && sz > REGSIZE) 3182 return; 3183 3184 if (sz <= 2 * REGSIZE) 3185 { 3186 type** argtype = &arg1type; 3187 size_t argsz = sz < REGSIZE ? sz : REGSIZE; 3188 foreach (v; 0 .. (sz > REGSIZE) + 1) 3189 { 3190 *argtype = argsz == 1 ? tstypes[TYchar] 3191 : argsz == 2 ? tstypes[TYshort] 3192 : argsz <= 4 ? tstypes[TYlong] 3193 : tstypes[TYllong]; 3194 argtype = &arg2type; 3195 argsz = sz - REGSIZE; 3196 } 3197 } 3198 3199 if (I64 && config.exe != EX_WIN64) 3200 { 3201 type* tn = t.Tnext; 3202 tym_t tyn = tn.Tty; 3203 while (tyn == TYarray) 3204 { 3205 tn = tn.Tnext; 3206 assert(tn); 3207 tyn = tybasic(tn.Tty); 3208 } 3209 3210 if (tybasic(tyn) == TYstruct) 3211 { 3212 if (type_size(tn) == sz) // array(s) of size 1 3213 { 3214 arg1type = tn.Ttag.Sstruct.Sarg1type; 3215 arg2type = tn.Ttag.Sstruct.Sarg2type; 3216 return; 3217 } 3218 3219 type* t1 = tn.Ttag.Sstruct.Sarg1type; 3220 if (t1) 3221 { 3222 tn = t1; 3223 tyn = tn.Tty; 3224 } 3225 } 3226 3227 if (sz == tysize(tyn)) 3228 { 3229 if (tysimd(tyn)) 3230 { 3231 type* ts = type_fake(tybasic(tyn)); 3232 ts.Tcount = 1; 3233 arg1type = ts; 3234 return; 3235 } 3236 else if (tybasic(tyn) == TYldouble || tybasic(tyn) == TYildouble) 3237 { 3238 arg1type = tstypes[tybasic(tyn)]; 3239 return; 3240 } 3241 } 3242 3243 if (sz <= 16) 3244 { 3245 if (tyfloating(tyn)) 3246 { 3247 arg1type = sz <= 4 ? tstypes[TYfloat] : tstypes[TYdouble]; 3248 if (sz > 8) 3249 arg2type = (sz - 8) <= 4 ? tstypes[TYfloat] : tstypes[TYdouble]; 3250 } 3251 } 3252 } 3253 } 3254 else if (tybasic(ty) == TYstruct) 3255 { 3256 // TODO: Move code from `cgelem.d:elstruct()` here 3257 } 3258 } 3259 3260 /******************************* 3261 * Generate code sequence for function call. 3262 */ 3263 3264 void cdfunc(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3265 { 3266 //printf("cdfunc()\n"); elem_print(e); 3267 assert(e); 3268 uint numpara = 0; // bytes of parameters 3269 uint numalign = 0; // bytes to align stack before pushing parameters 3270 uint stackpushsave = stackpush; // so we can compute # of parameters 3271 cgstate.stackclean++; 3272 regm_t keepmsk = 0; 3273 int xmmcnt = 0; 3274 tym_t tyf = tybasic(e.EV.E1.Ety); // the function type 3275 3276 // Easier to deal with parameters as an array: parameters[0..np] 3277 int np = OTbinary(e.Eoper) ? el_nparams(e.EV.E2) : 0; 3278 Parameter *parameters = cast(Parameter *)alloca(np * Parameter.sizeof); 3279 3280 if (np) 3281 { 3282 int n = 0; 3283 fillParameters(e.EV.E2, parameters, &n); 3284 assert(n == np); 3285 } 3286 3287 Symbol *sf = null; // symbol of the function being called 3288 if (e.EV.E1.Eoper == OPvar) 3289 sf = e.EV.E1.EV.Vsym; 3290 3291 /* Assume called function access statics 3292 */ 3293 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_FREEBSD | EX_FREEBSD64) && 3294 config.flags3 & CFG3pic) 3295 cgstate.accessedTLS = true; 3296 3297 /* Special handling for call to __tls_get_addr, we must save registers 3298 * before evaluating the parameter, so that the parameter load and call 3299 * are adjacent. 3300 */ 3301 if (np == 1 && sf) 3302 { 3303 if (sf == tls_get_addr_sym) 3304 getregs(cdb, ~sf.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3305 } 3306 3307 uint stackalign = REGSIZE; 3308 if (tyf == TYf16func) 3309 stackalign = 2; 3310 // Figure out which parameters go in registers. 3311 // Compute numpara, the total bytes pushed on the stack 3312 FuncParamRegs fpr = FuncParamRegs_create(tyf); 3313 for (int i = np; --i >= 0;) 3314 { 3315 elem *ep = parameters[i].e; 3316 uint psize = cast(uint)_align(stackalign, paramsize(ep, tyf)); // align on stack boundary 3317 if (config.exe == EX_WIN64) 3318 { 3319 //printf("[%d] size = %u, numpara = %d ep = %p ", i, psize, numpara, ep); WRTYxx(ep.Ety); printf("\n"); 3320 debug 3321 if (psize > REGSIZE) elem_print(e); 3322 3323 assert(psize <= REGSIZE); 3324 psize = REGSIZE; 3325 } 3326 //printf("[%d] size = %u, numpara = %d ", i, psize, numpara); WRTYxx(ep.Ety); printf("\n"); 3327 if (FuncParamRegs_alloc(fpr, ep.ET, ep.Ety, ¶meters[i].reg, ¶meters[i].reg2)) 3328 { 3329 if (config.exe == EX_WIN64) 3330 numpara += REGSIZE; // allocate stack space for it anyway 3331 continue; // goes in register, not stack 3332 } 3333 3334 // Parameter i goes on the stack 3335 parameters[i].reg = NOREG; 3336 uint alignsize = el_alignsize(ep); 3337 parameters[i].numalign = 0; 3338 if (alignsize > stackalign && 3339 (I64 || (alignsize >= 16 && 3340 (config.exe & (EX_OSX | EX_LINUX) && (tyaggregate(ep.Ety) || tyvector(ep.Ety)))))) 3341 { 3342 if (alignsize > STACKALIGN) 3343 { 3344 STACKALIGN = alignsize; 3345 enforcealign = true; 3346 } 3347 uint newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1); 3348 parameters[i].numalign = newnumpara - numpara; 3349 numpara = newnumpara; 3350 assert(config.exe != EX_WIN64); 3351 } 3352 numpara += psize; 3353 } 3354 3355 if (config.exe == EX_WIN64) 3356 { 3357 if (numpara < 4 * REGSIZE) 3358 numpara = 4 * REGSIZE; 3359 } 3360 3361 //printf("numpara = %d, stackpush = %d\n", numpara, stackpush); 3362 assert((numpara & (REGSIZE - 1)) == 0); 3363 assert((stackpush & (REGSIZE - 1)) == 0); 3364 3365 /* Should consider reordering the order of evaluation of the parameters 3366 * so that args that go into registers are evaluated after args that get 3367 * pushed. We can reorder args that are constants or relconst's. 3368 */ 3369 3370 /* Determine if we should use cgstate.funcarg for the parameters or push them 3371 */ 3372 bool usefuncarg = false; 3373 static if (0) 3374 { 3375 printf("test1 %d %d %d %d %d %d %d %d\n", (config.flags4 & CFG4speed)!=0, !Alloca.size, 3376 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)), 3377 cast(int)numpara, !stackpush, 3378 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos), 3379 (!typfunc(tyf) || sf && sf.Sflags & SFLexit), !I16); 3380 } 3381 if (config.flags4 & CFG4speed && 3382 !Alloca.size && 3383 /* The cleanup code calls a local function, leaving the return address on 3384 * the top of the stack. If parameters are placed there, the return address 3385 * is stepped on. 3386 * A better solution is turn this off only inside the cleanup code. 3387 */ 3388 !usednteh && 3389 !calledFinally && 3390 (numpara || config.exe == EX_WIN64) && 3391 stackpush == 0 && // cgstate.funcarg needs to be at top of stack 3392 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos) && 3393 (!(typfunc(tyf) || tyf == TYhfunc) || sf && sf.Sflags & SFLexit) && 3394 !anyiasm && !I16 3395 ) 3396 { 3397 for (int i = 0; i < np; i++) 3398 { 3399 elem* ep = parameters[i].e; 3400 int preg = parameters[i].reg; 3401 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3402 if (preg == NOREG) 3403 { 3404 switch (ep.Eoper) 3405 { 3406 case OPstrctor: 3407 case OPstrthis: 3408 case OPstrpar: 3409 case OPnp_fp: 3410 goto Lno; 3411 3412 default: 3413 break; 3414 } 3415 } 3416 } 3417 3418 if (numpara > cgstate.funcarg.size) 3419 { // New high water mark 3420 //printf("increasing size from %d to %d\n", (int)cgstate.funcarg.size, (int)numpara); 3421 cgstate.funcarg.size = numpara; 3422 } 3423 usefuncarg = true; 3424 } 3425 Lno: 3426 3427 /* Adjust start of the stack so after all args are pushed, 3428 * the stack will be aligned. 3429 */ 3430 if (!usefuncarg && STACKALIGN >= 16 && (numpara + stackpush) & (STACKALIGN - 1)) 3431 { 3432 numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1)); 3433 cod3_stackadj(cdb, numalign); 3434 cdb.genadjesp(numalign); 3435 stackpush += numalign; 3436 stackpushsave += numalign; 3437 } 3438 assert(stackpush == stackpushsave); 3439 if (config.exe == EX_WIN64) 3440 { 3441 //printf("np = %d, numpara = %d, stackpush = %d\n", np, numpara, stackpush); 3442 assert(numpara == ((np < 4) ? 4 * REGSIZE : np * REGSIZE)); 3443 3444 // Allocate stack space for four entries anyway 3445 // http://msdn.microsoft.com/en-US/library/ew5tede7(v=vs.80) 3446 } 3447 3448 int[XMM7 + 1] regsaved = void; 3449 memset(regsaved.ptr, -1, regsaved.sizeof); 3450 CodeBuilder cdbrestore; 3451 cdbrestore.ctor(); 3452 regm_t saved = 0; 3453 targ_size_t funcargtossave = cgstate.funcargtos; 3454 targ_size_t funcargtos = numpara; 3455 //printf("funcargtos1 = %d\n", cast(int)funcargtos); 3456 3457 /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9 3458 * float and double parameters go into XMM0..XMM7 3459 * For variadic functions, count of XMM registers used goes in AL 3460 */ 3461 for (int i = 0; i < np; i++) 3462 { 3463 elem* ep = parameters[i].e; 3464 int preg = parameters[i].reg; 3465 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3466 if (preg == NOREG) 3467 { 3468 /* Push parameter on stack, but keep track of registers used 3469 * in the process. If they interfere with keepmsk, we'll have 3470 * to save/restore them. 3471 */ 3472 CodeBuilder cdbsave; 3473 cdbsave.ctor(); 3474 regm_t overlap = msavereg & keepmsk; 3475 msavereg |= keepmsk; 3476 CodeBuilder cdbparams; 3477 cdbparams.ctor(); 3478 if (usefuncarg) 3479 movParams(cdbparams, ep, stackalign, cast(uint)funcargtos, tyf); 3480 else 3481 pushParams(cdbparams,ep,stackalign, tyf); 3482 regm_t tosave = keepmsk & ~msavereg; 3483 msavereg &= ~keepmsk | overlap; 3484 3485 // tosave is the mask to save and restore 3486 for (reg_t j = 0; tosave; j++) 3487 { 3488 regm_t mi = mask(j); 3489 assert(j <= XMM7); 3490 if (mi & tosave) 3491 { 3492 uint idx; 3493 regsave.save(cdbsave, j, &idx); 3494 regsave.restore(cdbrestore, j, idx); 3495 saved |= mi; 3496 keepmsk &= ~mi; // don't need to keep these for rest of params 3497 tosave &= ~mi; 3498 } 3499 } 3500 3501 cdb.append(cdbsave); 3502 cdb.append(cdbparams); 3503 3504 // Alignment for parameter comes after it got pushed 3505 const uint numalignx = parameters[i].numalign; 3506 if (usefuncarg) 3507 { 3508 funcargtos -= _align(stackalign, paramsize(ep, tyf)) + numalignx; 3509 cgstate.funcargtos = funcargtos; 3510 } 3511 else if (numalignx) 3512 { 3513 cod3_stackadj(cdb, numalignx); 3514 cdb.genadjesp(numalignx); 3515 stackpush += numalignx; 3516 } 3517 } 3518 else 3519 { 3520 // Goes in register preg, not stack 3521 regm_t retregs = mask(preg); 3522 if (retregs & XMMREGS) 3523 ++xmmcnt; 3524 int preg2 = parameters[i].reg2; 3525 reg_t mreg,lreg; 3526 if (preg2 != NOREG || tybasic(ep.Ety) == TYcfloat) 3527 { 3528 assert(ep.Eoper != OPstrthis); 3529 if (mask(preg2) & XMMREGS) 3530 ++xmmcnt; 3531 if (tybasic(ep.Ety) == TYcfloat) 3532 { 3533 lreg = ST01; 3534 mreg = NOREG; 3535 } 3536 else if (tyrelax(ep.Ety) == TYcent) 3537 { 3538 lreg = mask(preg ) & mLSW ? cast(reg_t)preg : AX; 3539 mreg = mask(preg2) & mMSW ? cast(reg_t)preg2 : DX; 3540 } 3541 else 3542 { 3543 lreg = XMM0; 3544 mreg = XMM1; 3545 } 3546 retregs = (mask(mreg) | mask(lreg)) & ~mask(NOREG); 3547 CodeBuilder cdbsave; 3548 cdbsave.ctor(); 3549 if (keepmsk & retregs) 3550 { 3551 regm_t tosave = keepmsk & retregs; 3552 3553 // tosave is the mask to save and restore 3554 for (reg_t j = 0; tosave; j++) 3555 { 3556 regm_t mi = mask(j); 3557 assert(j <= XMM7); 3558 if (mi & tosave) 3559 { 3560 uint idx; 3561 regsave.save(cdbsave, j, &idx); 3562 regsave.restore(cdbrestore, j, idx); 3563 saved |= mi; 3564 keepmsk &= ~mi; // don't need to keep these for rest of params 3565 tosave &= ~mi; 3566 } 3567 } 3568 } 3569 cdb.append(cdbsave); 3570 3571 scodelem(cdb, ep, &retregs, keepmsk, false); 3572 3573 // Move result [mreg,lreg] into parameter registers from [preg2,preg] 3574 retregs = 0; 3575 if (preg != lreg) 3576 retregs |= mask(preg); 3577 if (preg2 != mreg) 3578 retregs |= mask(preg2); 3579 retregs &= ~mask(NOREG); 3580 getregs(cdb,retregs); 3581 3582 tym_t ty1 = tybasic(ep.Ety); 3583 tym_t ty2 = ty1; 3584 if (ep.Ety & mTYgprxmm) 3585 { 3586 ty1 = TYllong; 3587 ty2 = TYdouble; 3588 } 3589 else if (ep.Ety & mTYxmmgpr) 3590 { 3591 ty1 = TYdouble; 3592 ty2 = TYllong; 3593 } 3594 else if (ty1 == TYstruct) 3595 { 3596 type* targ1 = ep.ET.Ttag.Sstruct.Sarg1type; 3597 type* targ2 = ep.ET.Ttag.Sstruct.Sarg2type; 3598 if (targ1) 3599 ty1 = targ1.Tty; 3600 if (targ2) 3601 ty2 = targ2.Tty; 3602 } 3603 else if (tyrelax(ty1) == TYcent) 3604 ty1 = ty2 = TYllong; 3605 else if (tybasic(ty1) == TYcdouble) 3606 ty1 = ty2 = TYdouble; 3607 3608 if (tybasic(ep.Ety) == TYcfloat) 3609 { 3610 assert(I64); 3611 assert(lreg == ST01 && mreg == NOREG); 3612 // spill 3613 pop87(); 3614 pop87(); 3615 cdb.genfltreg(0xD9, 3, tysize(TYfloat)); 3616 genfwait(cdb); 3617 cdb.genfltreg(0xD9, 3, 0); 3618 genfwait(cdb); 3619 // reload 3620 if (config.exe == EX_WIN64) 3621 { 3622 cdb.genfltreg(LOD, preg, 0); 3623 code_orrex(cdb.last(), REX_W); 3624 } 3625 else 3626 { 3627 assert(mask(preg) & XMMREGS); 3628 cdb.genxmmreg(xmmload(TYdouble), cast(reg_t) preg, 0, TYdouble); 3629 } 3630 } 3631 else foreach (v; 0 .. 2) 3632 { 3633 if (v ^ (preg != mreg)) 3634 genmovreg(cdb, preg, lreg, ty1); 3635 else 3636 genmovreg(cdb, preg2, mreg, ty2); 3637 } 3638 3639 retregs = (mask(preg) | mask(preg2)) & ~mask(NOREG); 3640 } 3641 else if (ep.Eoper == OPstrthis) 3642 { 3643 getregs(cdb,retregs); 3644 // LEA preg,np[RSP] 3645 uint delta = stackpush - ep.EV.Vuns; // stack delta to parameter 3646 cdb.genc1(LEA, 3647 (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,delta); 3648 if (I64) 3649 code_orrex(cdb.last(), REX_W); 3650 } 3651 else if (ep.Eoper == OPstrpar && config.exe == EX_WIN64 && type_size(ep.ET) == 0) 3652 { 3653 retregs = 0; 3654 scodelem(cdb, ep.EV.E1, &retregs, keepmsk, false); 3655 freenode(ep); 3656 } 3657 else 3658 { 3659 scodelem(cdb, ep, &retregs, keepmsk, false); 3660 } 3661 keepmsk |= retregs; // don't change preg when evaluating func address 3662 } 3663 } 3664 3665 if (config.exe == EX_WIN64) 3666 { // Allocate stack space for four entries anyway 3667 // http://msdn.microsoft.com/en-US/library/ew5tede7(v=vs.80) 3668 { uint sz = 4 * REGSIZE; 3669 if (usefuncarg) 3670 { 3671 funcargtos -= sz; 3672 cgstate.funcargtos = funcargtos; 3673 } 3674 else 3675 { 3676 cod3_stackadj(cdb, sz); 3677 cdb.genadjesp(sz); 3678 stackpush += sz; 3679 } 3680 } 3681 3682 /* Variadic functions store XMM parameters into their corresponding GP registers 3683 */ 3684 for (int i = 0; i < np; i++) 3685 { 3686 int preg = parameters[i].reg; 3687 regm_t retregs = mask(preg); 3688 if (retregs & XMMREGS) 3689 { 3690 reg_t reg; 3691 switch (preg) 3692 { 3693 case XMM0: reg = CX; break; 3694 case XMM1: reg = DX; break; 3695 case XMM2: reg = R8; break; 3696 case XMM3: reg = R9; break; 3697 3698 default: assert(0); 3699 } 3700 getregs(cdb,mask(reg)); 3701 cdb.gen2(STOD,(REX_W << 16) | modregxrmx(3,preg-XMM0,reg)); // MOVD reg,preg 3702 } 3703 } 3704 } 3705 3706 // Restore any register parameters we saved 3707 getregs(cdb,saved); 3708 cdb.append(cdbrestore); 3709 keepmsk |= saved; 3710 3711 // Variadic functions store the number of XMM registers used in AL 3712 if (I64 && config.exe != EX_WIN64 && e.Eflags & EFLAGS_variadic) 3713 { 3714 getregs(cdb,mAX); 3715 movregconst(cdb,AX,xmmcnt,1); 3716 keepmsk |= mAX; 3717 } 3718 3719 //printf("funcargtos2 = %d\n", (int)funcargtos); 3720 assert(!usefuncarg || (funcargtos == 0 && cgstate.funcargtos == 0)); 3721 cgstate.stackclean--; 3722 3723 debug 3724 if (!usefuncarg && numpara != stackpush - stackpushsave) 3725 { 3726 printf("function %s\n", funcsym_p.Sident.ptr); 3727 printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave); 3728 elem_print(e); 3729 } 3730 3731 assert(usefuncarg || numpara == stackpush - stackpushsave); 3732 3733 funccall(cdb,e,numpara,numalign,pretregs,keepmsk,usefuncarg); 3734 cgstate.funcargtos = funcargtossave; 3735 } 3736 3737 /*********************************** 3738 */ 3739 3740 void cdstrthis(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3741 { 3742 assert(tysize(e.Ety) == REGSIZE); 3743 const reg = findreg(*pretregs & allregs); 3744 getregs(cdb,mask(reg)); 3745 // LEA reg,np[ESP] 3746 uint np = stackpush - e.EV.Vuns; // stack delta to parameter 3747 cdb.genc1(LEA,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np); 3748 if (I64) 3749 code_orrex(cdb.last(), REX_W); 3750 fixresult(cdb, e, mask(reg), pretregs); 3751 } 3752 3753 /****************************** 3754 * Call function. All parameters have already been pushed onto the stack. 3755 * Params: 3756 * e = function call 3757 * numpara = size in bytes of all the parameters 3758 * numalign = amount the stack was aligned by before the parameters were pushed 3759 * pretregs = where return value goes 3760 * keepmsk = registers to not change when evaluating the function address 3761 * usefuncarg = using cgstate.funcarg, so no need to adjust stack after func return 3762 */ 3763 3764 private void funccall(ref CodeBuilder cdb, elem* e, uint numpara, uint numalign, 3765 regm_t* pretregs,regm_t keepmsk, bool usefuncarg) 3766 { 3767 //printf("%s ", funcsym_p.Sident.ptr); 3768 //printf("funccall(e = %p, *pretregs = %s, numpara = %d, numalign = %d, usefuncarg=%d)\n",e,regm_str(*pretregs),numpara,numalign,usefuncarg); 3769 calledafunc = 1; 3770 // Determine if we need frame for function prolog/epilog 3771 3772 static if (TARGET_WINDOS) 3773 { 3774 if (config.memmodel == Vmodel) 3775 { 3776 if (tyfarfunc(funcsym_p.ty())) 3777 needframe = true; 3778 } 3779 } 3780 3781 code cs; 3782 regm_t retregs; 3783 Symbol* s; 3784 3785 elem* e1 = e.EV.E1; 3786 tym_t tym1 = tybasic(e1.Ety); 3787 char farfunc = tyfarfunc(tym1) || tym1 == TYifunc; 3788 3789 CodeBuilder cdbe; 3790 cdbe.ctor(); 3791 3792 if (e1.Eoper == OPvar) 3793 { // Call function directly 3794 3795 if (!tyfunc(tym1)) 3796 WRTYxx(tym1); 3797 assert(tyfunc(tym1)); 3798 s = e1.EV.Vsym; 3799 if (s.Sflags & SFLexit) 3800 { } 3801 else if (s != tls_get_addr_sym) 3802 save87(cdb); // assume 8087 regs are all trashed 3803 3804 // Function calls may throw Errors, unless marked that they don't 3805 if (s == funcsym_p || !s.Sfunc || !(s.Sfunc.Fflags3 & Fnothrow)) 3806 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3807 3808 if (s.Sflags & SFLexit) 3809 { 3810 // Function doesn't return, so don't worry about registers 3811 // it may use 3812 } 3813 else if (!tyfunc(s.ty()) || !(config.flags4 & CFG4optimized)) 3814 // so we can replace func at runtime 3815 getregs(cdbe,~fregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3816 else 3817 getregs(cdbe,~s.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3818 if (strcmp(s.Sident.ptr, "alloca") == 0) 3819 { 3820 s = getRtlsym(RTLSYM_ALLOCA); 3821 makeitextern(s); 3822 int areg = CX; 3823 if (config.exe == EX_WIN64) 3824 areg = DX; 3825 getregs(cdbe, mask(areg)); 3826 cdbe.genc(LEA, modregrm(2, areg, BPRM), FLallocatmp, 0, 0, 0); // LEA areg,&localsize[BP] 3827 if (I64) 3828 code_orrex(cdbe.last(), REX_W); 3829 Alloca.size = REGSIZE; 3830 } 3831 if (sytab[s.Sclass] & SCSS) // if function is on stack (!) 3832 { 3833 retregs = allregs & ~keepmsk; 3834 s.Sflags &= ~GTregcand; 3835 s.Sflags |= SFLread; 3836 cdrelconst(cdbe,e1,&retregs); 3837 if (farfunc) 3838 { 3839 const reg = findregmsw(retregs); 3840 const lsreg = findreglsw(retregs); 3841 floatreg = true; // use float register 3842 reflocal = true; 3843 cdbe.genc1(0x89, // MOV floatreg+2,reg 3844 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3845 cdbe.genc1(0x89, // MOV floatreg,lsreg 3846 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3847 if (tym1 == TYifunc) 3848 cdbe.gen1(0x9C); // PUSHF 3849 cdbe.genc1(0xFF, // CALL [floatreg] 3850 modregrm(2, 3, BPRM), FLfltreg, 0); 3851 } 3852 else 3853 { 3854 const reg = findreg(retregs); 3855 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 3856 if (I64) 3857 code_orrex(cdbe.last(), REX_W); 3858 } 3859 } 3860 else 3861 { 3862 int fl = FLfunc; 3863 if (!tyfunc(s.ty())) 3864 fl = el_fl(e1); 3865 if (tym1 == TYifunc) 3866 cdbe.gen1(0x9C); // PUSHF 3867 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 3868 { 3869 assert(!farfunc); 3870 if (s != tls_get_addr_sym) 3871 { 3872 //printf("call %s\n", s.Sident.ptr); 3873 load_localgot(cdb); 3874 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3875 } 3876 else if (I64) 3877 { 3878 /* Prepend 66 66 48 so GNU linker has patch room 3879 */ 3880 assert(!farfunc); 3881 cdbe.gen1(0x66); 3882 cdbe.gen1(0x66); 3883 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3884 cdbe.last().Irex = REX | REX_W; 3885 } 3886 else 3887 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3888 } 3889 else 3890 { 3891 cdbe.gencs(farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern 3892 } 3893 code_orflag(cdbe.last(), farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff)); 3894 } 3895 } 3896 else 3897 { // Call function via pointer 3898 3899 // Function calls may throw Errors 3900 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3901 3902 if (e1.Eoper != OPind) { WRFL(cast(FL)el_fl(e1)); WROP(e1.Eoper); } 3903 save87(cdb); // assume 8087 regs are all trashed 3904 assert(e1.Eoper == OPind); 3905 elem *e11 = e1.EV.E1; 3906 tym_t e11ty = tybasic(e11.Ety); 3907 assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); 3908 load_localgot(cdb); 3909 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 3910 { 3911 if (config.flags3 & CFG3pic && I32) 3912 keepmsk |= mBX; 3913 } 3914 3915 /* Mask of registers destroyed by the function call 3916 */ 3917 regm_t desmsk = (mBP | ALLREGS | mES | XMMREGS) & ~fregsaved; 3918 3919 // if we can't use loadea() 3920 if ((!OTleaf(e11.Eoper) || e11.Eoper == OPconst) && 3921 (e11.Eoper != OPind || e11.Ecount)) 3922 { 3923 retregs = allregs & ~keepmsk; 3924 cgstate.stackclean++; 3925 scodelem(cdbe,e11,&retregs,keepmsk,true); 3926 cgstate.stackclean--; 3927 // Kill registers destroyed by an arbitrary function call 3928 getregs(cdbe,desmsk); 3929 if (e11ty == TYfptr) 3930 { 3931 const reg = findregmsw(retregs); 3932 const lsreg = findreglsw(retregs); 3933 floatreg = true; // use float register 3934 reflocal = true; 3935 cdbe.genc1(0x89, // MOV floatreg+2,reg 3936 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3937 cdbe.genc1(0x89, // MOV floatreg,lsreg 3938 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3939 if (tym1 == TYifunc) 3940 cdbe.gen1(0x9C); // PUSHF 3941 cdbe.genc1(0xFF, // CALL [floatreg] 3942 modregrm(2, 3, BPRM), FLfltreg, 0); 3943 } 3944 else 3945 { 3946 const reg = findreg(retregs); 3947 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 3948 if (I64) 3949 code_orrex(cdbe.last(), REX_W); 3950 } 3951 } 3952 else 3953 { 3954 if (tym1 == TYifunc) 3955 cdb.gen1(0x9C); // PUSHF 3956 // CALL [function] 3957 cs.Iflags = 0; 3958 cgstate.stackclean++; 3959 loadea(cdbe, e11, &cs, 0xFF, farfunc ? 3 : 2, 0, keepmsk, desmsk); 3960 cgstate.stackclean--; 3961 freenode(e11); 3962 } 3963 s = null; 3964 } 3965 cdb.append(cdbe); 3966 freenode(e1); 3967 3968 /* See if we will need the frame pointer. 3969 Calculate it here so we can possibly use BP to fix the stack. 3970 */ 3971 static if (0) 3972 { 3973 if (!needframe) 3974 { 3975 // If there is a register available for this basic block 3976 if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used)) 3977 { } 3978 else 3979 { 3980 for (SYMIDX si = 0; si < globsym.length; si++) 3981 { 3982 Symbol* s = globsym[si]; 3983 3984 if (s.Sflags & GTregcand && type_size(s.Stype) != 0) 3985 { 3986 if (config.flags4 & CFG4optimized) 3987 { // If symbol is live in this basic block and 3988 // isn't already in a register 3989 if (s.Srange && vec_testbit(dfoidx, s.Srange) && 3990 s.Sfl != FLreg) 3991 { // Then symbol must be allocated on stack 3992 needframe = true; 3993 break; 3994 } 3995 } 3996 else 3997 { if (mfuncreg == 0) // if no registers left 3998 { needframe = true; 3999 break; 4000 } 4001 } 4002 } 4003 } 4004 } 4005 } 4006 } 4007 4008 reg_t reg1 = NOREG, reg2 = NOREG; 4009 4010 if (config.exe == EX_WIN64) // Win64 is currently broken 4011 retregs = regmask(e.Ety, tym1); 4012 else 4013 retregs = allocretregs(e.Ety, e.ET, tym1, ®1, ®2); 4014 4015 assert(retregs || !*pretregs); 4016 4017 if (!usefuncarg) 4018 { 4019 // If stack needs cleanup 4020 if (s && s.Sflags & SFLexit) 4021 { 4022 if (config.fulltypes && TARGET_WINDOS) 4023 { 4024 // the stack walker evaluates the return address, not a byte of the 4025 // call instruction, so ensure there is an instruction byte after 4026 // the call that still has the same line number information 4027 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); 4028 } 4029 /* Function never returns, so don't need to generate stack 4030 * cleanup code. But still need to log the stack cleanup 4031 * as if it did return. 4032 */ 4033 cdb.genadjesp(-(numpara + numalign)); 4034 stackpush -= numpara + numalign; 4035 } 4036 else if ((OTbinary(e.Eoper) || config.exe == EX_WIN64) && 4037 (!typfunc(tym1) || config.exe == EX_WIN64)) 4038 { 4039 if (tym1 == TYhfunc) 4040 { // Hidden parameter is popped off by the callee 4041 cdb.genadjesp(-REGSIZE); 4042 stackpush -= REGSIZE; 4043 if (numpara + numalign > REGSIZE) 4044 genstackclean(cdb, numpara + numalign - REGSIZE, retregs); 4045 } 4046 else 4047 genstackclean(cdb, numpara + numalign, retregs); 4048 } 4049 else 4050 { 4051 cdb.genadjesp(-numpara); // popped off by the callee's 'RET numpara' 4052 stackpush -= numpara; 4053 if (numalign) // callee doesn't know about alignment adjustment 4054 genstackclean(cdb,numalign,retregs); 4055 } 4056 } 4057 4058 /* Special handling for functions which return a floating point 4059 value in the top of the 8087 stack. 4060 */ 4061 4062 if (retregs & mST0) 4063 { 4064 cdb.genadjfpu(1); 4065 if (*pretregs) // if we want the result 4066 { 4067 //assert(global87.stackused == 0); 4068 push87(cdb); // one item on 8087 stack 4069 fixresult87(cdb,e,retregs,pretregs); 4070 return; 4071 } 4072 else 4073 // Pop unused result off 8087 stack 4074 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4075 } 4076 else if (retregs & mST01) 4077 { 4078 cdb.genadjfpu(2); 4079 if (*pretregs) // if we want the result 4080 { 4081 assert(global87.stackused == 0); 4082 push87(cdb); 4083 push87(cdb); // two items on 8087 stack 4084 fixresult_complex87(cdb, e, retregs, pretregs); 4085 return; 4086 } 4087 else 4088 { 4089 // Pop unused result off 8087 stack 4090 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4091 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4092 } 4093 } 4094 4095 /* Special handling for functions that return one part 4096 in XMM0 and the other part in AX 4097 */ 4098 if (*pretregs && retregs) 4099 { 4100 if (reg1 == NOREG || reg2 == NOREG) 4101 {} 4102 else if ((0 == (mask(reg1) & XMMREGS)) ^ (0 == (mask(reg2) & XMMREGS))) 4103 { 4104 reg_t lreg, mreg; 4105 if (mask(reg1) & XMMREGS) 4106 { 4107 lreg = XMM0; 4108 mreg = XMM1; 4109 } 4110 else 4111 { 4112 lreg = mask(reg1) & mLSW ? reg1 : AX; 4113 mreg = mask(reg2) & mMSW ? reg2 : DX; 4114 } 4115 for (int v = 0; v < 2; v++) 4116 { 4117 if (v ^ (reg2 != lreg)) 4118 genmovreg(cdb,lreg,reg1); 4119 else 4120 genmovreg(cdb,mreg,reg2); 4121 } 4122 retregs = mask(lreg) | mask(mreg); 4123 } 4124 } 4125 4126 /* Special handling for functions which return complex float in XMM0 or RAX. */ 4127 4128 if (I64 4129 && config.exe != EX_WIN64 // broken 4130 && *pretregs && tybasic(e.Ety) == TYcfloat) 4131 { 4132 assert(reg2 == NOREG); 4133 // spill 4134 if (config.exe == EX_WIN64) 4135 { 4136 assert(reg1 == AX); 4137 cdb.genfltreg(STO, reg1, 0); 4138 code_orrex(cdb.last(), REX_W); 4139 } 4140 else 4141 { 4142 assert(reg1 == XMM0); 4143 cdb.genxmmreg(xmmstore(TYdouble), reg1, 0, TYdouble); 4144 } 4145 // reload real 4146 push87(cdb); 4147 cdb.genfltreg(0xD9, 0, 0); 4148 genfwait(cdb); 4149 // reload imaginary 4150 push87(cdb); 4151 cdb.genfltreg(0xD9, 0, tysize(TYfloat)); 4152 genfwait(cdb); 4153 4154 retregs = mST01; 4155 } 4156 4157 fixresult(cdb, e, retregs, pretregs); 4158 } 4159 4160 /*************************** 4161 * Determine size of argument e that will be pushed. 4162 */ 4163 4164 targ_size_t paramsize(elem* e, tym_t tyf) 4165 { 4166 assert(e.Eoper != OPparam); 4167 targ_size_t szb; 4168 tym_t tym = tybasic(e.Ety); 4169 if (tyscalar(tym)) 4170 szb = size(tym); 4171 else if (tym == TYstruct || tym == TYarray) 4172 szb = type_parameterSize(e.ET, tyf); 4173 else 4174 { 4175 WRTYxx(tym); 4176 assert(0); 4177 } 4178 return szb; 4179 } 4180 4181 /*************************** 4182 * Generate code to move argument e on the stack. 4183 */ 4184 4185 private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funcargtos, tym_t tyf) 4186 { 4187 //printf("movParams(e = %p, stackalign = %d, funcargtos = %d)\n", e, stackalign, funcargtos); 4188 //printf("movParams()\n"); elem_print(e); 4189 assert(!I16); 4190 assert(e && e.Eoper != OPparam); 4191 4192 tym_t tym = tybasic(e.Ety); 4193 if (tyfloating(tym)) 4194 objmod.fltused(); 4195 4196 int grex = I64 ? REX_W << 16 : 0; 4197 4198 targ_size_t szb = paramsize(e, tyf); // size before alignment 4199 targ_size_t sz = _align(stackalign, szb); // size after alignment 4200 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 4201 assert((sz & (REGSIZE - 1)) == 0); 4202 //printf("szb = %d sz = %d\n", (int)szb, (int)sz); 4203 4204 code cs; 4205 cs.Iflags = 0; 4206 cs.Irex = 0; 4207 switch (e.Eoper) 4208 { 4209 case OPstrctor: 4210 case OPstrthis: 4211 case OPstrpar: 4212 case OPnp_fp: 4213 assert(0); 4214 4215 case OPrelconst: 4216 { 4217 int fl; 4218 if (!evalinregister(e) && 4219 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 4220 ((fl = el_fl(e)) == FLdata || fl == FLudata || fl == FLextern) 4221 ) 4222 { 4223 // MOV -stackoffset[EBP],&variable 4224 cs.Iop = 0xC7; 4225 cs.Irm = modregrm(2,0,BPRM); 4226 if (I64 && sz == 8) 4227 cs.Irex |= REX_W; 4228 cs.IFL1 = FLfuncarg; 4229 cs.IEV1.Voffset = funcargtos - REGSIZE; 4230 cs.IEV2.Voffset = e.EV.Voffset; 4231 cs.IFL2 = cast(ubyte)fl; 4232 cs.IEV2.Vsym = e.EV.Vsym; 4233 cs.Iflags |= CFoff; 4234 cdb.gen(&cs); 4235 return; 4236 } 4237 break; 4238 } 4239 4240 case OPconst: 4241 if (!evalinregister(e)) 4242 { 4243 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 4244 cs.Irm = modregrm(2,0,BPRM); 4245 cs.IFL1 = FLfuncarg; 4246 cs.IEV1.Voffset = funcargtos - sz; 4247 cs.IFL2 = FLconst; 4248 targ_size_t *p = cast(targ_size_t *) &(e.EV); 4249 cs.IEV2.Vsize_t = *p; 4250 if (I64 && tym == TYcldouble) 4251 // The alignment of EV.Vcldouble is not the same on the compiler 4252 // as on the target 4253 goto Lbreak; 4254 if (I64 && sz >= 8) 4255 { 4256 int i = cast(int)sz; 4257 do 4258 { 4259 if (*p >= 0x80000000) 4260 { // Use 64 bit register MOV, as the 32 bit one gets sign extended 4261 // MOV reg,imm64 4262 // MOV EA,reg 4263 goto Lbreak; 4264 } 4265 p = cast(targ_size_t *)(cast(char *) p + REGSIZE); 4266 i -= REGSIZE; 4267 } while (i > 0); 4268 p = cast(targ_size_t *) &(e.EV); 4269 } 4270 4271 int i = cast(int)sz; 4272 do 4273 { int regsize = REGSIZE; 4274 regm_t retregs = (sz == 1) ? BYTEREGS : allregs; 4275 reg_t reg; 4276 if (reghasvalue(retregs,*p,®)) 4277 { 4278 cs.Iop = (cs.Iop & 1) | 0x88; 4279 cs.Irm |= modregrm(0, reg & 7, 0); // MOV EA,reg 4280 if (reg & 8) 4281 cs.Irex |= REX_R; 4282 if (I64 && sz == 1 && reg >= 4) 4283 cs.Irex |= REX; 4284 } 4285 if (I64 && sz >= 8) 4286 cs.Irex |= REX_W; 4287 cdb.gen(&cs); // MOV EA,const 4288 4289 p = cast(targ_size_t *)(cast(char *) p + regsize); 4290 cs.Iop = 0xC7; 4291 cs.Irm &= cast(ubyte)~cast(int)modregrm(0, 7, 0); 4292 cs.Irex &= ~REX_R; 4293 cs.IEV1.Voffset += regsize; 4294 cs.IEV2.Vint = cast(targ_int)*p; 4295 i -= regsize; 4296 } while (i > 0); 4297 return; 4298 } 4299 4300 Lbreak: 4301 break; 4302 4303 default: 4304 break; 4305 } 4306 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 4307 if (tyvector(tym)) 4308 { 4309 retregs = XMMREGS; 4310 codelem(cdb, e, &retregs, false); 4311 const op = xmmstore(tym); 4312 const r = findreg(retregs); 4313 cdb.genc1(op, modregxrm(2, r - XMM0, BPRM), FLfuncarg, funcargtos - 16); // MOV funcarg[EBP],r 4314 checkSetVex(cdb.last(),tym); 4315 return; 4316 } 4317 else if (tyfloating(tym)) 4318 { 4319 if (config.inline8087) 4320 { 4321 retregs = tycomplex(tym) ? mST01 : mST0; 4322 codelem(cdb, e, &retregs, false); 4323 4324 opcode_t op; 4325 uint r; 4326 switch (tym) 4327 { 4328 case TYfloat: 4329 case TYifloat: 4330 case TYcfloat: 4331 op = 0xD9; 4332 r = 3; 4333 break; 4334 4335 case TYdouble: 4336 case TYidouble: 4337 case TYdouble_alias: 4338 case TYcdouble: 4339 op = 0xDD; 4340 r = 3; 4341 break; 4342 4343 case TYldouble: 4344 case TYildouble: 4345 case TYcldouble: 4346 op = 0xDB; 4347 r = 7; 4348 break; 4349 4350 default: 4351 assert(0); 4352 } 4353 if (tycomplex(tym)) 4354 { 4355 // FSTP sz/2[ESP] 4356 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz/2); 4357 pop87(); 4358 } 4359 pop87(); 4360 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz); // FSTP -sz[EBP] 4361 return; 4362 } 4363 } 4364 scodelem(cdb, e, &retregs, 0, true); 4365 if (sz <= REGSIZE) 4366 { 4367 uint r = findreg(retregs); 4368 cdb.genc1(0x89, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4369 if (sz == 8) 4370 code_orrex(cdb.last(), REX_W); 4371 } 4372 else if (sz == REGSIZE * 2) 4373 { 4374 uint r = findregmsw(retregs); 4375 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4376 r = findreglsw(retregs); 4377 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE * 2); // MOV -2*REGSIZE[EBP],r 4378 } 4379 else 4380 assert(0); 4381 } 4382 4383 4384 /*************************** 4385 * Generate code to push argument e on the stack. 4386 * stackpush is incremented by stackalign for each PUSH. 4387 */ 4388 4389 void pushParams(ref CodeBuilder cdb, elem* e, uint stackalign, tym_t tyf) 4390 { 4391 //printf("params(e = %p, stackalign = %d)\n", e, stackalign); 4392 //printf("params()\n"); elem_print(e); 4393 stackchanged = 1; 4394 assert(e && e.Eoper != OPparam); 4395 4396 tym_t tym = tybasic(e.Ety); 4397 if (tyfloating(tym)) 4398 objmod.fltused(); 4399 4400 int grex = I64 ? REX_W << 16 : 0; 4401 4402 targ_size_t szb = paramsize(e, tyf); // size before alignment 4403 targ_size_t sz = _align(stackalign,szb); // size after alignment 4404 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 4405 assert((sz & (REGSIZE - 1)) == 0); 4406 4407 switch (e.Eoper) 4408 { 4409 version (SCPP) 4410 { 4411 case OPstrctor: 4412 { 4413 elem* e1 = e.EV.E1; 4414 docommas(cdb,&e1); // skip over any comma expressions 4415 4416 cod3_stackadj(cdb, sz); 4417 stackpush += sz; 4418 cdb.genadjesp(sz); 4419 4420 // Find OPstrthis and set it to stackpush 4421 exp2_setstrthis(e1, null, stackpush, null); 4422 4423 regm_t retregs = 0; 4424 codelem(cdb, e1, &retregs, true); 4425 freenode(e); 4426 return; 4427 } 4428 case OPstrthis: 4429 // This is the parameter for the 'this' pointer corresponding to 4430 // OPstrctor. We push a pointer to an object that was already 4431 // allocated on the stack by OPstrctor. 4432 { 4433 regm_t retregs = allregs; 4434 reg_t reg; 4435 allocreg(cdb, &retregs, ®, TYoffset); 4436 genregs(cdb, 0x89, SP, reg); // MOV reg,SP 4437 if (I64) 4438 code_orrex(cdb.last(), REX_W); 4439 uint np = stackpush - e.EV.Vuns; // stack delta to parameter 4440 cdb.genc2(0x81, grex | modregrmx(3, 0, reg), np); // ADD reg,np 4441 if (sz > REGSIZE) 4442 { 4443 cdb.gen1(0x16); // PUSH SS 4444 stackpush += REGSIZE; 4445 } 4446 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4447 if (reg & 8) 4448 code_orrex(cdb.last(), REX_B); 4449 stackpush += REGSIZE; 4450 cdb.genadjesp(sz); 4451 freenode(e); 4452 return; 4453 } 4454 } 4455 4456 case OPstrpar: 4457 { 4458 uint rm; 4459 4460 elem* e1 = e.EV.E1; 4461 if (sz == 0) 4462 { 4463 docommas(cdb, &e1); // skip over any commas 4464 4465 const stackpushsave = stackpush; 4466 const stackcleansave = cgstate.stackclean; 4467 cgstate.stackclean = 0; 4468 4469 regm_t retregs = 0; 4470 codelem(cdb,e1,&retregs,true); 4471 4472 assert(cgstate.stackclean == 0); 4473 cgstate.stackclean = stackcleansave; 4474 genstackclean(cdb,stackpush - stackpushsave,0); 4475 4476 freenode(e); 4477 return; 4478 } 4479 if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1.Eoper == OPvar) 4480 { 4481 freenode(e); 4482 e = e1; 4483 goto L1; 4484 } 4485 docommas(cdb,&e1); // skip over any commas 4486 code_flags_t seg = 0; // assume no seg override 4487 regm_t retregs = sz ? IDXREGS : 0; 4488 bool doneoff = false; 4489 uint pushsize = REGSIZE; 4490 uint op16 = 0; 4491 if (!I16 && sz & 2) // if odd number of words to push 4492 { 4493 pushsize = 2; 4494 op16 = 1; 4495 } 4496 else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0) 4497 { 4498 pushsize = 4; // push DWORDs at a time 4499 op16 = 1; 4500 } 4501 uint npushes = cast(uint)(sz / pushsize); 4502 switch (e1.Eoper) 4503 { 4504 case OPind: 4505 if (sz) 4506 { 4507 switch (tybasic(e1.EV.E1.Ety)) 4508 { 4509 case TYfptr: 4510 case TYhptr: 4511 seg = CFes; 4512 retregs |= mES; 4513 break; 4514 4515 case TYsptr: 4516 if (config.wflags & WFssneds) 4517 seg = CFss; 4518 break; 4519 4520 case TYfgPtr: 4521 if (I32) 4522 seg = CFgs; 4523 else if (I64) 4524 seg = CFfs; 4525 else 4526 assert(0); 4527 break; 4528 4529 case TYcptr: 4530 seg = CFcs; 4531 break; 4532 4533 default: 4534 break; 4535 } 4536 } 4537 codelem(cdb, e1.EV.E1, &retregs, false); 4538 freenode(e1); 4539 break; 4540 4541 case OPvar: 4542 /* Symbol is no longer a candidate for a register */ 4543 e1.EV.Vsym.Sflags &= ~GTregcand; 4544 4545 if (!e1.Ecount && npushes > 4) 4546 { 4547 /* Kludge to point at last word in struct. */ 4548 /* Don't screw up CSEs. */ 4549 e1.EV.Voffset += sz - pushsize; 4550 doneoff = true; 4551 } 4552 //if (LARGEDATA) /* if default isn't DS */ 4553 { 4554 static immutable uint[4] segtocf = [ CFes,CFcs,CFss,0 ]; 4555 4556 int fl = el_fl(e1); 4557 if (fl == FLfardata) 4558 { 4559 seg = CFes; 4560 retregs |= mES; 4561 } 4562 else 4563 { 4564 uint s = segfl[fl]; 4565 assert(s < 4); 4566 seg = segtocf[s]; 4567 if (seg == CFss && !(config.wflags & WFssneds)) 4568 seg = 0; 4569 } 4570 } 4571 if (e1.Ety & mTYfar) 4572 { 4573 seg = CFes; 4574 retregs |= mES; 4575 } 4576 cdrelconst(cdb, e1, &retregs); 4577 // Reverse the effect of the previous add 4578 if (doneoff) 4579 e1.EV.Voffset -= sz - pushsize; 4580 freenode(e1); 4581 break; 4582 4583 case OPstreq: 4584 //case OPcond: 4585 if (!(config.exe & EX_flat)) 4586 { 4587 seg = CFes; 4588 retregs |= mES; 4589 } 4590 codelem(cdb, e1, &retregs, false); 4591 break; 4592 4593 case OPpair: 4594 case OPrpair: 4595 pushParams(cdb, e1, stackalign, tyf); 4596 freenode(e); 4597 return; 4598 4599 default: 4600 elem_print(e1); 4601 assert(0); 4602 } 4603 reg_t reg = findreglsw(retregs); 4604 rm = I16 ? regtorm[reg] : regtorm32[reg]; 4605 if (op16) 4606 seg |= CFopsize; // operand size 4607 if (npushes <= 4) 4608 { 4609 assert(!doneoff); 4610 for (; npushes > 1; --npushes) 4611 { 4612 cdb.genc1(0xFF, buildModregrm(2, 6, rm), FLconst, pushsize * (npushes - 1)); // PUSH [reg] 4613 code_orflag(cdb.last(),seg); 4614 cdb.genadjesp(pushsize); 4615 } 4616 cdb.gen2(0xFF,buildModregrm(0, 6, rm)); // PUSH [reg] 4617 cdb.last().Iflags |= seg; 4618 cdb.genadjesp(pushsize); 4619 } 4620 else if (sz) 4621 { 4622 getregs_imm(cdb, mCX | retregs); 4623 // MOV CX,sz/2 4624 movregconst(cdb, CX, npushes, 0); 4625 if (!doneoff) 4626 { // This should be done when 4627 // reg is loaded. Fix later 4628 // ADD reg,sz-pushsize 4629 cdb.genc2(0x81, grex | modregrmx(3, 0, reg), sz-pushsize); 4630 } 4631 getregs(cdb,mCX); // the LOOP decrements it 4632 cdb.gen2(0xFF, buildModregrm(0, 6, rm)); // PUSH [reg] 4633 cdb.last().Iflags |= seg | CFtarg2; 4634 code* c3 = cdb.last(); 4635 cdb.genc2(0x81,grex | buildModregrm(3, 5,reg), pushsize); // SUB reg,pushsize 4636 if (I16 || config.flags4 & CFG4space) 4637 genjmp(cdb,0xE2,FLcode,cast(block *)c3);// LOOP c3 4638 else 4639 { 4640 if (I64) 4641 cdb.gen2(0xFF, modregrm(3, 1, CX));// DEC CX 4642 else 4643 cdb.gen1(0x48 + CX); // DEC CX 4644 genjmp(cdb, JNE, FLcode, cast(block *)c3); // JNE c3 4645 } 4646 regimmed_set(CX,0); 4647 cdb.genadjesp(cast(int)sz); 4648 } 4649 stackpush += sz; 4650 freenode(e); 4651 return; 4652 } 4653 4654 case OPind: 4655 if (!e.Ecount) /* if *e1 */ 4656 { 4657 if (sz <= REGSIZE) 4658 { // Watch out for single byte quantities being up 4659 // against the end of a segment or in memory-mapped I/O 4660 if (!(config.exe & EX_flat) && szb == 1) 4661 break; 4662 goto L1; // can handle it with loadea() 4663 } 4664 4665 // Avoid PUSH MEM on the Pentium when optimizing for speed 4666 if (config.flags4 & CFG4speed && 4667 (config.target_cpu >= TARGET_80486 && 4668 config.target_cpu <= TARGET_PentiumMMX) && 4669 sz <= 2 * REGSIZE && 4670 !tyfloating(tym)) 4671 break; 4672 4673 if (tym == TYldouble || tym == TYildouble || tycomplex(tym)) 4674 break; 4675 4676 code cs; 4677 cs.Iflags = 0; 4678 cs.Irex = 0; 4679 if (I32) 4680 { 4681 assert(sz >= REGSIZE * 2); 4682 loadea(cdb, e, &cs, 0xFF, 6, sz - REGSIZE, 0, 0); // PUSH EA+4 4683 cdb.genadjesp(REGSIZE); 4684 stackpush += REGSIZE; 4685 sz -= REGSIZE; 4686 4687 if (sz > REGSIZE) 4688 { 4689 while (sz) 4690 { 4691 cs.IEV1.Voffset -= REGSIZE; 4692 cdb.gen(&cs); // PUSH EA+... 4693 cdb.genadjesp(REGSIZE); 4694 stackpush += REGSIZE; 4695 sz -= REGSIZE; 4696 } 4697 freenode(e); 4698 return; 4699 } 4700 } 4701 else 4702 { 4703 if (sz == DOUBLESIZE) 4704 { 4705 loadea(cdb, e, &cs, 0xFF, 6, DOUBLESIZE - REGSIZE, 0, 0); // PUSH EA+6 4706 cs.IEV1.Voffset -= REGSIZE; 4707 cdb.gen(&cs); // PUSH EA+4 4708 cdb.genadjesp(REGSIZE); 4709 getlvalue_lsw(&cs); 4710 cdb.gen(&cs); // PUSH EA+2 4711 } 4712 else /* TYlong */ 4713 loadea(cdb, e, &cs, 0xFF, 6, REGSIZE, 0, 0); // PUSH EA+2 4714 cdb.genadjesp(REGSIZE); 4715 } 4716 stackpush += sz; 4717 getlvalue_lsw(&cs); 4718 cdb.gen(&cs); // PUSH EA 4719 cdb.genadjesp(REGSIZE); 4720 freenode(e); 4721 return; 4722 } 4723 break; 4724 4725 case OPnp_fp: 4726 if (!e.Ecount) /* if (far *)e1 */ 4727 { 4728 elem* e1 = e.EV.E1; 4729 tym_t tym1 = tybasic(e1.Ety); 4730 /* BUG: what about pointers to functions? */ 4731 int segreg; 4732 switch (tym1) 4733 { 4734 case TYnptr: segreg = 3<<3; break; 4735 case TYcptr: segreg = 1<<3; break; 4736 default: segreg = 2<<3; break; 4737 } 4738 if (I32 && stackalign == 2) 4739 cdb.gen1(0x66); // push a word 4740 cdb.gen1(0x06 + segreg); // PUSH SEGREG 4741 if (I32 && stackalign == 2) 4742 code_orflag(cdb.last(), CFopsize); // push a word 4743 cdb.genadjesp(stackalign); 4744 stackpush += stackalign; 4745 pushParams(cdb, e1, stackalign, tyf); 4746 freenode(e); 4747 return; 4748 } 4749 break; 4750 4751 case OPrelconst: 4752 static if (TARGET_SEGMENTED) 4753 { 4754 /* Determine if we can just push the segment register */ 4755 /* Test size of type rather than TYfptr because of (long)(&v) */ 4756 Symbol* s = e.EV.Vsym; 4757 //if (sytab[s.Sclass] & SCSS && !I32) // if variable is on stack 4758 // needframe = true; // then we need stack frame 4759 int fl; 4760 if (_tysize[tym] == tysize(TYfptr) && 4761 (fl = s.Sfl) != FLfardata && 4762 /* not a function that CS might not be the segment of */ 4763 (!((fl == FLfunc || s.ty() & mTYcs) && 4764 (s.Sclass == SCcomdat || s.Sclass == SCextern || s.Sclass == SCinline || config.wflags & WFthunk)) || 4765 (fl == FLfunc && config.exe == EX_DOSX) 4766 ) 4767 ) 4768 { 4769 stackpush += sz; 4770 cdb.gen1(0x06 + // PUSH SEGREG 4771 (((fl == FLfunc || s.ty() & mTYcs) ? 1 : segfl[fl]) << 3)); 4772 cdb.genadjesp(REGSIZE); 4773 4774 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4775 { 4776 getoffset(cdb, e, STACK); 4777 freenode(e); 4778 return; 4779 } 4780 else 4781 { 4782 regm_t retregs; 4783 offsetinreg(cdb, e, &retregs); 4784 const reg = findreg(retregs); 4785 genpush(cdb,reg); // PUSH reg 4786 cdb.genadjesp(REGSIZE); 4787 } 4788 return; 4789 } 4790 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4791 { 4792 stackpush += sz; 4793 if (_tysize[tym] == tysize(TYfptr)) 4794 { 4795 // PUSH SEG e 4796 cdb.gencs(0x68,0,FLextern,s); 4797 cdb.last().Iflags = CFseg; 4798 cdb.genadjesp(REGSIZE); 4799 } 4800 getoffset(cdb, e, STACK); 4801 freenode(e); 4802 return; 4803 } 4804 } 4805 break; /* else must evaluate expression */ 4806 4807 case OPvar: 4808 L1: 4809 if (config.flags4 & CFG4speed && 4810 (config.target_cpu >= TARGET_80486 && 4811 config.target_cpu <= TARGET_PentiumMMX) && 4812 sz <= 2 * REGSIZE && 4813 !tyfloating(tym)) 4814 { // Avoid PUSH MEM on the Pentium when optimizing for speed 4815 break; 4816 } 4817 else if (movOnly(e) || (tyxmmreg(tym) && config.fpxmmregs) || tyvector(tym)) 4818 break; // no PUSH MEM 4819 else 4820 { 4821 int regsize = REGSIZE; 4822 uint flag = 0; 4823 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 && 4824 !e.Ecount) 4825 { 4826 regsize = 4; 4827 flag |= CFopsize; 4828 } 4829 code cs; 4830 cs.Iflags = 0; 4831 cs.Irex = 0; 4832 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); // PUSH EA+sz-2 4833 code_orflag(cdb.last(), flag); 4834 cdb.genadjesp(REGSIZE); 4835 stackpush += sz; 4836 while (cast(targ_int)(sz -= regsize) > 0) 4837 { 4838 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); 4839 code_orflag(cdb.last(), flag); 4840 cdb.genadjesp(REGSIZE); 4841 } 4842 freenode(e); 4843 return; 4844 } 4845 4846 case OPconst: 4847 { 4848 char pushi = 0; 4849 uint flag = 0; 4850 int regsize = REGSIZE; 4851 4852 if (tycomplex(tym)) 4853 break; 4854 4855 if (I64 && tyfloating(tym) && sz > 4 && boolres(e)) 4856 // Can't push 64 bit non-zero args directly 4857 break; 4858 4859 if (I32 && szb == 10) // special case for long double constants 4860 { 4861 assert(sz == 12); 4862 targ_int value = e.EV.Vushort8[4]; // pick upper 2 bytes of Vldouble 4863 stackpush += sz; 4864 cdb.genadjesp(cast(int)sz); 4865 for (int i = 0; i < 3; ++i) 4866 { 4867 reg_t reg; 4868 if (reghasvalue(allregs, value, ®)) 4869 cdb.gen1(0x50 + reg); // PUSH reg 4870 else 4871 cdb.genc2(0x68,0,value); // PUSH value 4872 value = e.EV.Vulong4[i ^ 1]; // treat Vldouble as 2 element array of 32 bit uint 4873 } 4874 freenode(e); 4875 return; 4876 } 4877 4878 assert(I64 || sz <= tysize(TYldouble)); 4879 int i = cast(int)sz; 4880 if (!I16 && i == 2) 4881 flag = CFopsize; 4882 4883 if (config.target_cpu >= TARGET_80286) 4884 // && (e.Ecount == 0 || e.Ecount != e.Ecomsub)) 4885 { 4886 pushi = 1; 4887 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4) 4888 { 4889 regsize = 4; 4890 flag = CFopsize; 4891 } 4892 } 4893 else if (i == REGSIZE) 4894 break; 4895 4896 stackpush += sz; 4897 cdb.genadjesp(cast(int)sz); 4898 targ_uns* pi = &e.EV.Vuns; // point to start of Vdouble 4899 targ_ushort* ps = cast(targ_ushort *) pi; 4900 targ_ullong* pl = cast(targ_ullong *)pi; 4901 i /= regsize; 4902 do 4903 { 4904 if (i) /* be careful not to go negative */ 4905 i--; 4906 4907 targ_size_t value; 4908 switch (regsize) 4909 { 4910 case 2: 4911 value = ps[i]; 4912 break; 4913 4914 case 4: 4915 if (tym == TYldouble || tym == TYildouble) 4916 /* The size is 10 bytes, and since we have 2 bytes left over, 4917 * just read those 2 bytes, not 4. 4918 * Otherwise we're reading uninitialized data. 4919 * I.e. read 4 bytes, 4 bytes, then 2 bytes 4920 */ 4921 value = i == 2 ? ps[4] : pi[i]; // 80 bits 4922 else 4923 value = pi[i]; 4924 break; 4925 4926 case 8: 4927 value = cast(targ_size_t)pl[i]; 4928 break; 4929 4930 default: 4931 assert(0); 4932 } 4933 4934 reg_t reg; 4935 if (pushi) 4936 { 4937 if (I64 && regsize == 8 && value != cast(int)value) 4938 { 4939 regwithvalue(cdb,allregs,value,®,64); 4940 goto Preg; // cannot push imm64 unless it is sign extended 32 bit value 4941 } 4942 if (regsize == REGSIZE && reghasvalue(allregs,value,®)) 4943 goto Preg; 4944 cdb.genc2((szb == 1) ? 0x6A : 0x68, 0, value); // PUSH value 4945 } 4946 else 4947 { 4948 regwithvalue(cdb, allregs, value, ®, 0); 4949 Preg: 4950 genpush(cdb,reg); // PUSH reg 4951 } 4952 code_orflag(cdb.last(), flag); // operand size 4953 } while (i); 4954 freenode(e); 4955 return; 4956 } 4957 4958 case OPpair: 4959 { 4960 if (e.Ecount) 4961 break; 4962 const op1 = e.EV.E1.Eoper; 4963 const op2 = e.EV.E2.Eoper; 4964 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 4965 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 4966 { 4967 pushParams(cdb, e.EV.E2, stackalign, tyf); 4968 pushParams(cdb, e.EV.E1, stackalign, tyf); 4969 freenode(e); 4970 } 4971 else if (tyfloating(e.EV.E1.Ety) || 4972 tyfloating(e.EV.E2.Ety)) 4973 { 4974 // Need special handling because of order of evaluation of e1 and e2 4975 break; 4976 } 4977 else 4978 { 4979 regm_t regs = allregs; 4980 codelem(cdb, e, ®s, false); 4981 genpush(cdb, findregmsw(regs)); // PUSH msreg 4982 genpush(cdb, findreglsw(regs)); // PUSH lsreg 4983 cdb.genadjesp(cast(int)sz); 4984 stackpush += sz; 4985 } 4986 return; 4987 } 4988 4989 case OPrpair: 4990 { 4991 if (e.Ecount) 4992 break; 4993 const op1 = e.EV.E1.Eoper; 4994 const op2 = e.EV.E2.Eoper; 4995 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 4996 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 4997 { 4998 pushParams(cdb, e.EV.E1, stackalign, tyf); 4999 pushParams(cdb, e.EV.E2, stackalign, tyf); 5000 freenode(e); 5001 } 5002 else if (tyfloating(e.EV.E1.Ety) || 5003 tyfloating(e.EV.E2.Ety)) 5004 { 5005 // Need special handling because of order of evaluation of e1 and e2 5006 break; 5007 } 5008 else 5009 { 5010 regm_t regs = allregs; 5011 codelem(cdb, e, ®s, false); 5012 genpush(cdb, findregmsw(regs)); // PUSH msreg 5013 genpush(cdb, findreglsw(regs)); // PUSH lsreg 5014 cdb.genadjesp(cast(int)sz); 5015 stackpush += sz; 5016 } 5017 return; 5018 } 5019 5020 default: 5021 break; 5022 } 5023 5024 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 5025 if (tyvector(tym) || (tyxmmreg(tym) && config.fpxmmregs)) 5026 { 5027 regm_t retxmm = XMMREGS; 5028 codelem(cdb, e, &retxmm, false); 5029 stackpush += sz; 5030 cdb.genadjesp(cast(int)sz); 5031 cod3_stackadj(cdb, cast(int)sz); 5032 const op = xmmstore(tym); 5033 const r = findreg(retxmm); 5034 cdb.gen2sib(op, modregxrm(0, r - XMM0,4 ), modregrm(0, 4, SP)); // MOV [ESP],r 5035 checkSetVex(cdb.last(),tym); 5036 return; 5037 } 5038 else if (tyfloating(tym)) 5039 { 5040 if (config.inline8087) 5041 { 5042 retregs = tycomplex(tym) ? mST01 : mST0; 5043 codelem(cdb, e, &retregs, false); 5044 stackpush += sz; 5045 cdb.genadjesp(cast(int)sz); 5046 cod3_stackadj(cdb, cast(int)sz); 5047 opcode_t op; 5048 uint r; 5049 switch (tym) 5050 { 5051 case TYfloat: 5052 case TYifloat: 5053 case TYcfloat: 5054 op = 0xD9; 5055 r = 3; 5056 break; 5057 5058 case TYdouble: 5059 case TYidouble: 5060 case TYdouble_alias: 5061 case TYcdouble: 5062 op = 0xDD; 5063 r = 3; 5064 break; 5065 5066 case TYldouble: 5067 case TYildouble: 5068 case TYcldouble: 5069 op = 0xDB; 5070 r = 7; 5071 break; 5072 5073 default: 5074 assert(0); 5075 } 5076 if (!I16) 5077 { 5078 if (tycomplex(tym)) 5079 { 5080 // FSTP sz/2[ESP] 5081 cdb.genc1(op, (modregrm(0, 4, SP) << 8) | modregxrm(2, r, 4),FLconst, sz/2); 5082 pop87(); 5083 } 5084 pop87(); 5085 cdb.gen2sib(op, modregrm(0, r, 4),modregrm(0, 4, SP)); // FSTP [ESP] 5086 } 5087 else 5088 { 5089 retregs = IDXREGS; // get an index reg 5090 reg_t reg; 5091 allocreg(cdb, &retregs, ®, TYoffset); 5092 genregs(cdb, 0x89, SP, reg); // MOV reg,SP 5093 pop87(); 5094 cdb.gen2(op, modregrm(0, r, regtorm[reg])); // FSTP [reg] 5095 } 5096 if (LARGEDATA) 5097 cdb.last().Iflags |= CFss; // want to store into stack 5098 genfwait(cdb); // FWAIT 5099 return; 5100 } 5101 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 5102 retregs = mSTACK; 5103 } 5104 else if (I16 && sz == 8) // if long long 5105 retregs = mSTACK; 5106 5107 scodelem(cdb,e,&retregs,0,true); 5108 if (retregs != mSTACK) // if stackpush not already inc'd 5109 stackpush += sz; 5110 if (sz <= REGSIZE) 5111 { 5112 genpush(cdb,findreg(retregs)); // PUSH reg 5113 cdb.genadjesp(cast(int)REGSIZE); 5114 } 5115 else if (sz == REGSIZE * 2) 5116 { 5117 genpush(cdb,findregmsw(retregs)); // PUSH msreg 5118 genpush(cdb,findreglsw(retregs)); // PUSH lsreg 5119 cdb.genadjesp(cast(int)sz); 5120 } 5121 } 5122 5123 /******************************* 5124 * Get offset portion of e, and store it in an index 5125 * register. Return mask of index register in *pretregs. 5126 */ 5127 5128 void offsetinreg(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 5129 { 5130 reg_t reg; 5131 regm_t retregs = mLSW; // want only offset 5132 if (e.Ecount && e.Ecount != e.Ecomsub) 5133 { 5134 regm_t rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */ 5135 for (uint i = 0; rm; i++) 5136 { 5137 if (mask(i) & rm && regcon.cse.value[i] == e) 5138 { 5139 *pretregs = mask(i); 5140 getregs(cdb, *pretregs); 5141 goto L3; 5142 } 5143 rm &= ~mask(i); 5144 } 5145 } 5146 5147 *pretregs = retregs; 5148 allocreg(cdb, pretregs, ®, TYoffset); 5149 getoffset(cdb,e,reg); 5150 L3: 5151 cssave(e, *pretregs,false); 5152 freenode(e); 5153 } 5154 5155 /****************************** 5156 * Generate code to load data into registers. 5157 */ 5158 5159 5160 void loaddata(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 5161 { 5162 reg_t reg; 5163 reg_t nreg; 5164 reg_t sreg; 5165 opcode_t op; 5166 tym_t tym; 5167 code cs; 5168 regm_t flags, forregs, regm; 5169 5170 debug 5171 { 5172 // if (debugw) 5173 // printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs)); 5174 // elem_print(e); 5175 } 5176 5177 assert(e); 5178 elem_debug(e); 5179 if (*pretregs == 0) 5180 return; 5181 tym = tybasic(e.Ety); 5182 if (tym == TYstruct) 5183 { 5184 cdrelconst(cdb,e,pretregs); 5185 return; 5186 } 5187 if (tyfloating(tym)) 5188 { 5189 objmod.fltused(); 5190 if (config.fpxmmregs && 5191 (tym == TYcfloat || tym == TYcdouble) && 5192 (*pretregs & (XMMREGS | mPSW)) 5193 ) 5194 { 5195 cloadxmm(cdb, e, pretregs); 5196 return; 5197 } 5198 else if (config.inline8087) 5199 { 5200 if (*pretregs & mST0) 5201 { 5202 load87(cdb, e, 0, pretregs, null, -1); 5203 return; 5204 } 5205 else if (tycomplex(tym)) 5206 { 5207 cload87(cdb, e, pretregs); 5208 return; 5209 } 5210 } 5211 } 5212 int sz = _tysize[tym]; 5213 cs.Iflags = 0; 5214 cs.Irex = 0; 5215 if (*pretregs == mPSW) 5216 { 5217 Symbol *s; 5218 regm = allregs; 5219 if (e.Eoper == OPconst) 5220 { /* true: OR SP,SP (SP is never 0) */ 5221 /* false: CMP SP,SP (always equal) */ 5222 genregs(cdb, (boolres(e)) ? 0x09 : 0x39 , SP, SP); 5223 if (I64) 5224 code_orrex(cdb.last(), REX_W); 5225 } 5226 else if (e.Eoper == OPvar && 5227 (s = e.EV.Vsym).Sfl == FLreg && 5228 s.Sregm & XMMREGS && 5229 (tym == TYfloat || tym == TYifloat || tym == TYdouble || tym ==TYidouble)) 5230 { 5231 tstresult(cdb,s.Sregm,e.Ety,true); 5232 } 5233 else if (sz <= REGSIZE) 5234 { 5235 if (!I16 && (tym == TYfloat || tym == TYifloat)) 5236 { 5237 allocreg(cdb, ®m, ®, TYoffset); // get a register 5238 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5239 cdb.gen2(0xD1,modregrmx(3,4,reg)); // SHL reg,1 5240 } 5241 else if (I64 && (tym == TYdouble || tym ==TYidouble)) 5242 { 5243 allocreg(cdb, ®m, ®, TYoffset); // get a register 5244 loadea(cdb, e,&cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5245 // remove sign bit, so that -0.0 == 0.0 5246 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 5247 code_orrex(cdb.last(), REX_W); 5248 } 5249 else if (TARGET_OSX && e.Eoper == OPvar && movOnly(e)) 5250 { 5251 allocreg(cdb, ®m, ®, TYoffset); // get a register 5252 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5253 fixresult(cdb, e, regm, pretregs); 5254 } 5255 else 5256 { cs.IFL2 = FLconst; 5257 cs.IEV2.Vsize_t = 0; 5258 op = (sz == 1) ? 0x80 : 0x81; 5259 loadea(cdb, e, &cs, op, 7, 0, 0, 0); // CMP EA,0 5260 5261 // Convert to TEST instruction if EA is a register 5262 // (to avoid register contention on Pentium) 5263 code *c = cdb.last(); 5264 if ((c.Iop & ~1) == 0x38 && 5265 (c.Irm & modregrm(3, 0, 0)) == modregrm(3, 0, 0) 5266 ) 5267 { 5268 c.Iop = (c.Iop & 1) | 0x84; 5269 code_newreg(c, c.Irm & 7); 5270 if (c.Irex & REX_B) 5271 //c.Irex = (c.Irex & ~REX_B) | REX_R; 5272 c.Irex |= REX_R; 5273 } 5274 } 5275 } 5276 else if (sz < 8) 5277 { 5278 allocreg(cdb, ®m, ®, TYoffset); // get a register 5279 if (I32) // it's a 48 bit pointer 5280 loadea(cdb, e, &cs, MOVZXw, reg, REGSIZE, 0, 0); // MOVZX reg,data+4 5281 else 5282 { 5283 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, 0, 0); // MOV reg,data+2 5284 if (tym == TYfloat || tym == TYifloat) // dump sign bit 5285 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5286 } 5287 loadea(cdb,e,&cs,0x0B,reg,0,regm,0); // OR reg,data 5288 } 5289 else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym))) 5290 { 5291 allocreg(cdb, ®m, ®, TYoffset); // get a register 5292 int i = sz - REGSIZE; 5293 loadea(cdb, e, &cs, 0x8B, reg, i, 0, 0); // MOV reg,data+6 5294 if (tyfloating(tym)) // TYdouble or TYdouble_alias 5295 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5296 5297 while ((i -= REGSIZE) >= 0) 5298 { 5299 loadea(cdb, e, &cs, 0x0B, reg, i, regm, 0); // OR reg,data+i 5300 code *c = cdb.last(); 5301 if (i == 0) 5302 c.Iflags |= CFpsw; // need the flags on last OR 5303 } 5304 } 5305 else if (sz == tysize(TYldouble)) // TYldouble 5306 load87(cdb, e, 0, pretregs, null, -1); 5307 else 5308 { 5309 elem_print(e); 5310 assert(0); 5311 } 5312 return; 5313 } 5314 /* not for flags only */ 5315 flags = *pretregs & mPSW; /* save original */ 5316 forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS); 5317 if (*pretregs & mSTACK) 5318 forregs |= DOUBLEREGS; 5319 if (e.Eoper == OPconst) 5320 { 5321 targ_size_t value = e.EV.Vint; 5322 if (sz == 8) 5323 value = cast(targ_size_t)e.EV.Vullong; 5324 5325 if (sz == REGSIZE && reghasvalue(forregs, value, ®)) 5326 forregs = mask(reg); 5327 5328 regm_t save = regcon.immed.mval; 5329 allocreg(cdb, &forregs, ®, tym); // allocate registers 5330 regcon.immed.mval = save; // KLUDGE! 5331 if (sz <= REGSIZE) 5332 { 5333 if (sz == 1) 5334 flags |= 1; 5335 else if (!I16 && sz == SHORTSIZE && 5336 !(mask(reg) & regcon.mvar) && 5337 !(config.flags4 & CFG4speed) 5338 ) 5339 flags |= 2; 5340 if (sz == 8) 5341 flags |= 64; 5342 if (isXMMreg(reg)) 5343 { /* This comes about because 0, 1, pi, etc., constants don't get stored 5344 * in the data segment, because they are x87 opcodes. 5345 * Not so efficient. We should at least do a PXOR for 0. 5346 */ 5347 reg_t r; 5348 targ_size_t unsvalue = e.EV.Vuns; 5349 if (sz == 8) 5350 unsvalue = cast(targ_size_t)e.EV.Vullong; 5351 regwithvalue(cdb,ALLREGS, unsvalue,&r,flags); 5352 flags = 0; // flags are already set 5353 cdb.genfltreg(0x89, r, 0); // MOV floatreg,r 5354 if (sz == 8) 5355 code_orrex(cdb.last(), REX_W); 5356 assert(sz == 4 || sz == 8); // float or double 5357 const opmv = xmmload(tym); 5358 cdb.genxmmreg(opmv, reg, 0, tym); // MOVSS/MOVSD XMMreg,floatreg 5359 } 5360 else 5361 { 5362 movregconst(cdb, reg, value, flags); 5363 flags = 0; // flags are already set 5364 } 5365 } 5366 else if (sz < 8) // far pointers, longs for 16 bit targets 5367 { 5368 targ_int msw = I32 ? e.EV.Vseg 5369 : (e.EV.Vulong >> 16); 5370 targ_int lsw = e.EV.Voff; 5371 regm_t mswflags = 0; 5372 if (forregs & mES) 5373 { 5374 movregconst(cdb, reg, msw, 0); // MOV reg,segment 5375 genregs(cdb, 0x8E, 0, reg); // MOV ES,reg 5376 msw = lsw; // MOV reg,offset 5377 } 5378 else 5379 { 5380 sreg = findreglsw(forregs); 5381 movregconst(cdb, sreg, lsw, 0); 5382 reg = findregmsw(forregs); 5383 /* Decide if we need to set flags when we load msw */ 5384 if (flags && (msw && msw|lsw || !(msw|lsw))) 5385 { mswflags = mPSW; 5386 flags = 0; 5387 } 5388 } 5389 movregconst(cdb, reg, msw, mswflags); 5390 } 5391 else if (sz == 8) 5392 { 5393 if (I32) 5394 { 5395 targ_long *p = cast(targ_long *)cast(void*)&e.EV.Vdouble; 5396 if (isXMMreg(reg)) 5397 { /* This comes about because 0, 1, pi, etc., constants don't get stored 5398 * in the data segment, because they are x87 opcodes. 5399 * Not so efficient. We should at least do a PXOR for 0. 5400 */ 5401 reg_t r; 5402 regm_t rm = ALLREGS; 5403 allocreg(cdb, &rm, &r, TYint); // allocate scratch register 5404 movregconst(cdb, r, p[0], 0); 5405 cdb.genfltreg(0x89, r, 0); // MOV floatreg,r 5406 movregconst(cdb, r, p[1], 0); 5407 cdb.genfltreg(0x89, r, 4); // MOV floatreg+4,r 5408 5409 const opmv = xmmload(tym); 5410 cdb.genxmmreg(opmv, reg, 0, tym); // MOVSS/MOVSD XMMreg,floatreg 5411 } 5412 else 5413 { 5414 movregconst(cdb, findreglsw(forregs) ,p[0], 0); 5415 movregconst(cdb, findregmsw(forregs) ,p[1], 0); 5416 } 5417 } 5418 else 5419 { targ_short *p = &e.EV.Vshort; // point to start of Vdouble 5420 5421 assert(reg == AX); 5422 movregconst(cdb, AX, p[3], 0); // MOV AX,p[3] 5423 movregconst(cdb, DX, p[0], 0); 5424 movregconst(cdb, CX, p[1], 0); 5425 movregconst(cdb, BX, p[2], 0); 5426 } 5427 } 5428 else if (I64 && sz == 16) 5429 { 5430 movregconst(cdb, findreglsw(forregs), cast(targ_size_t)e.EV.Vcent.lsw, 64); 5431 movregconst(cdb, findregmsw(forregs), cast(targ_size_t)e.EV.Vcent.msw, 64); 5432 } 5433 else 5434 assert(0); 5435 // Flags may already be set 5436 *pretregs &= flags | ~mPSW; 5437 fixresult(cdb, e, forregs, pretregs); 5438 return; 5439 } 5440 else 5441 { 5442 // See if we can use register that parameter was passed in 5443 if (regcon.params && 5444 regParamInPreg(e.EV.Vsym) && 5445 !anyiasm && // may have written to the memory for the parameter 5446 (regcon.params & mask(e.EV.Vsym.Spreg) && e.EV.Voffset == 0 || 5447 regcon.params & mask(e.EV.Vsym.Spreg2) && e.EV.Voffset == REGSIZE) && 5448 sz <= REGSIZE) // make sure no 'paint' to a larger size happened 5449 { 5450 reg = e.EV.Voffset ? e.EV.Vsym.Spreg2 : e.EV.Vsym.Spreg; 5451 forregs = mask(reg); 5452 5453 if (debugr) 5454 printf("%s.%d is fastpar and using register %s\n", 5455 e.EV.Vsym.Sident.ptr, 5456 cast(int)e.EV.Voffset, 5457 regm_str(forregs)); 5458 5459 mfuncreg &= ~forregs; 5460 regcon.used |= forregs; 5461 fixresult(cdb,e,forregs,pretregs); 5462 return; 5463 } 5464 5465 allocreg(cdb, &forregs, ®, tym); // allocate registers 5466 5467 if (sz == 1) 5468 { regm_t nregm; 5469 5470 debug 5471 if (!(forregs & BYTEREGS)) 5472 { elem_print(e); 5473 printf("forregs = %s\n", regm_str(forregs)); 5474 } 5475 5476 opcode_t opmv = 0x8A; // byte MOV 5477 static if (TARGET_OSX) 5478 { 5479 if (movOnly(e)) 5480 opmv = 0x8B; 5481 } 5482 assert(forregs & BYTEREGS); 5483 if (!I16) 5484 { 5485 if (config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed && 5486 // Workaround for OSX linker bug: 5487 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5488 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5489 ) 5490 { 5491 // opmv = tyuns(tym) ? MOVZXb : MOVSXb; // MOVZX/MOVSX 5492 } 5493 loadea(cdb, e, &cs, opmv, reg, 0, 0, 0); // MOV regL,data 5494 } 5495 else 5496 { 5497 nregm = tyuns(tym) ? BYTEREGS : cast(regm_t) mAX; 5498 if (*pretregs & nregm) 5499 nreg = reg; // already allocated 5500 else 5501 allocreg(cdb, &nregm, &nreg, tym); 5502 loadea(cdb, e, &cs, opmv, nreg, 0, 0, 0); // MOV nregL,data 5503 if (reg != nreg) 5504 { 5505 genmovreg(cdb, reg, nreg); // MOV reg,nreg 5506 cssave(e, mask(nreg), false); 5507 } 5508 } 5509 } 5510 else if (forregs & XMMREGS) 5511 { 5512 // Can't load from registers directly to XMM regs 5513 //e.EV.Vsym.Sflags &= ~GTregcand; 5514 5515 opcode_t opmv = xmmload(tym, xmmIsAligned(e)); 5516 if (e.Eoper == OPvar) 5517 { 5518 Symbol *s = e.EV.Vsym; 5519 if (s.Sfl == FLreg && !(mask(s.Sreglsw) & XMMREGS)) 5520 { opmv = LODD; // MOVD/MOVQ 5521 /* getlvalue() will unwind this and unregister s; could use a better solution */ 5522 } 5523 } 5524 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); // MOVSS/MOVSD reg,data 5525 checkSetVex(cdb.last(),tym); 5526 } 5527 else if (sz <= REGSIZE) 5528 { 5529 opcode_t opmv = 0x8B; // MOV reg,data 5530 if (sz == 2 && !I16 && config.target_cpu >= TARGET_PentiumPro && 5531 // Workaround for OSX linker bug: 5532 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5533 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5534 ) 5535 { 5536 // opmv = tyuns(tym) ? MOVZXw : MOVSXw; // MOVZX/MOVSX 5537 } 5538 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); 5539 } 5540 else if (sz <= 2 * REGSIZE && forregs & mES) 5541 { 5542 loadea(cdb, e, &cs, 0xC4, reg, 0, 0, mES); // LES data 5543 } 5544 else if (sz <= 2 * REGSIZE) 5545 { 5546 if (I32 && sz == 8 && 5547 (*pretregs & (mSTACK | mPSW)) == mSTACK) 5548 { 5549 assert(0); 5550 /+ 5551 /* Note that we allocreg(DOUBLEREGS) needlessly */ 5552 stackchanged = 1; 5553 int i = DOUBLESIZE - REGSIZE; 5554 do 5555 { 5556 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5557 cdb.genadjesp(REGSIZE); 5558 stackpush += REGSIZE; 5559 i -= REGSIZE; 5560 } 5561 while (i >= 0); 5562 return; 5563 +/ 5564 } 5565 5566 reg = findregmsw(forregs); 5567 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, forregs, 0); // MOV reg,data+2 5568 if (I32 && sz == REGSIZE + 2) 5569 cdb.last().Iflags |= CFopsize; // seg is 16 bits 5570 reg = findreglsw(forregs); 5571 loadea(cdb, e, &cs, 0x8B, reg, 0, forregs, 0); // MOV reg,data 5572 } 5573 else if (sz >= 8) 5574 { 5575 assert(!I32); 5576 if ((*pretregs & (mSTACK | mPSW)) == mSTACK) 5577 { 5578 // Note that we allocreg(DOUBLEREGS) needlessly 5579 stackchanged = 1; 5580 int i = sz - REGSIZE; 5581 do 5582 { 5583 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5584 cdb.genadjesp(REGSIZE); 5585 stackpush += REGSIZE; 5586 i -= REGSIZE; 5587 } 5588 while (i >= 0); 5589 return; 5590 } 5591 else 5592 { 5593 assert(reg == AX); 5594 loadea(cdb, e, &cs, 0x8B, AX, 6, 0, 0); // MOV AX,data+6 5595 loadea(cdb, e, &cs, 0x8B, BX, 4, mAX, 0); // MOV BX,data+4 5596 loadea(cdb, e, &cs, 0x8B, CX, 2, mAX|mBX, 0); // MOV CX,data+2 5597 loadea(cdb, e, &cs, 0x8B, DX, 0, mAX|mCX|mCX, 0); // MOV DX,data 5598 } 5599 } 5600 else 5601 assert(0); 5602 // Flags may already be set 5603 *pretregs &= flags | ~mPSW; 5604 fixresult(cdb, e, forregs, pretregs); 5605 return; 5606 } 5607 } 5608 5609 }