1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1984-1998 by Symantec 6 * Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod1.d, backend/cod1.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod1.d 11 */ 12 13 module dmd.backend.cod1; 14 15 version (SCPP) 16 version = COMPILE; 17 version (MARS) 18 version = COMPILE; 19 20 version (COMPILE) 21 { 22 23 import core.stdc.stdio; 24 import core.stdc.stdlib; 25 import core.stdc.string; 26 27 import dmd.backend.backend; 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.code; 31 import dmd.backend.code_x86; 32 import dmd.backend.codebuilder; 33 import dmd.backend.mem; 34 import dmd.backend.el; 35 import dmd.backend.exh; 36 import dmd.backend.global; 37 import dmd.backend.obj; 38 import dmd.backend.oper; 39 import dmd.backend.rtlsym; 40 import dmd.backend.ty; 41 import dmd.backend.type; 42 import dmd.backend.xmm; 43 44 extern (C++): 45 46 nothrow: 47 48 int REGSIZE(); 49 50 extern __gshared CGstate cgstate; 51 extern __gshared ubyte[FLMAX] segfl; 52 extern __gshared bool[FLMAX] stackfl; 53 54 private extern (D) uint mask(uint m) { return 1 << m; } 55 56 private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 57 58 /* array to convert from index register to r/m field */ 59 /* AX CX DX BX SP BP SI DI */ 60 private __gshared const byte[8] regtorm32 = [ 0, 1, 2, 3,-1, 5, 6, 7 ]; 61 __gshared const byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; 62 63 targ_size_t paramsize(elem *e, tym_t tyf); 64 //void funccall(ref CodeBuilder cdb,elem *e,uint numpara,uint numalign, 65 // regm_t *pretregs,regm_t keepmsk, bool usefuncarg); 66 67 /********************************* 68 * Determine if we should leave parameter `s` in the register it 69 * came in, or allocate a register it using the register 70 * allocator. 71 * Params: 72 * s = parameter Symbol 73 * Returns: 74 * `true` if `s` is a register parameter and leave it in the register it came in 75 */ 76 bool regParamInPreg(Symbol* s) 77 { 78 //printf("regPAramInPreg %s\n", s.Sident.ptr); 79 return (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && 80 (!(config.flags4 & CFG4optimized) || !(s.Sflags & GTregcand)); 81 } 82 83 84 /************************** 85 * Determine if e is a 32 bit scaled index addressing mode. 86 * Returns: 87 * 0 not a scaled index addressing mode 88 * !=0 the value for ss in the SIB byte 89 */ 90 91 int isscaledindex(elem *e) 92 { 93 targ_uns ss; 94 95 assert(!I16); 96 while (e.Eoper == OPcomma) 97 e = e.EV.E2; 98 if (!(e.Eoper == OPshl && !e.Ecount && 99 e.EV.E2.Eoper == OPconst && 100 (ss = e.EV.E2.EV.Vuns) <= 3 101 ) 102 ) 103 ss = 0; 104 return ss; 105 } 106 107 /********************************************* 108 * Generate code for which isscaledindex(e) returned a non-zero result. 109 */ 110 111 /*private*/ void cdisscaledindex(ref CodeBuilder cdb,elem *e,regm_t *pidxregs,regm_t keepmsk) 112 { 113 // Load index register with result of e.EV.E1 114 while (e.Eoper == OPcomma) 115 { 116 regm_t r = 0; 117 scodelem(cdb, e.EV.E1, &r, keepmsk, true); 118 freenode(e); 119 e = e.EV.E2; 120 } 121 assert(e.Eoper == OPshl); 122 scodelem(cdb, e.EV.E1, pidxregs, keepmsk, true); 123 freenode(e.EV.E2); 124 freenode(e); 125 } 126 127 /*********************************** 128 * Determine index if we can do two LEA instructions as a multiply. 129 * Returns: 130 * 0 can't do it 131 */ 132 133 enum 134 { 135 SSFLnobp = 1, /// can't have EBP in relconst 136 SSFLnobase1 = 2, /// no base register for first LEA 137 SSFLnobase = 4, /// no base register 138 SSFLlea = 8, /// can do it in one LEA 139 } 140 141 struct Ssindex 142 { 143 targ_uns product; 144 ubyte ss1; 145 ubyte ss2; 146 ubyte ssflags; /// SSFLxxxx 147 } 148 149 private __gshared const Ssindex[21] ssindex_array = 150 [ 151 { 0, 0, 0 }, // [0] is a place holder 152 153 { 3, 1, 0, SSFLnobp | SSFLlea }, 154 { 5, 2, 0, SSFLnobp | SSFLlea }, 155 { 9, 3, 0, SSFLnobp | SSFLlea }, 156 157 { 6, 1, 1, SSFLnobase }, 158 { 12, 1, 2, SSFLnobase }, 159 { 24, 1, 3, SSFLnobase }, 160 { 10, 2, 1, SSFLnobase }, 161 { 20, 2, 2, SSFLnobase }, 162 { 40, 2, 3, SSFLnobase }, 163 { 18, 3, 1, SSFLnobase }, 164 { 36, 3, 2, SSFLnobase }, 165 { 72, 3, 3, SSFLnobase }, 166 167 { 15, 2, 1, SSFLnobp }, 168 { 25, 2, 2, SSFLnobp }, 169 { 27, 3, 1, SSFLnobp }, 170 { 45, 3, 2, SSFLnobp }, 171 { 81, 3, 3, SSFLnobp }, 172 173 { 16, 3, 1, SSFLnobase1 | SSFLnobase }, 174 { 32, 3, 2, SSFLnobase1 | SSFLnobase }, 175 { 64, 3, 3, SSFLnobase1 | SSFLnobase }, 176 ]; 177 178 int ssindex(OPER op,targ_uns product) 179 { 180 if (op == OPshl) 181 product = 1 << product; 182 for (size_t i = 1; i < ssindex_array.length; i++) 183 { 184 if (ssindex_array[i].product == product) 185 return cast(int)i; 186 } 187 return 0; 188 } 189 190 /*************************************** 191 * Build an EA of the form disp[base][index*scale]. 192 * Input: 193 * c struct to fill in 194 * base base register (-1 if none) 195 * index index register (-1 if none) 196 * scale scale factor - 1,2,4,8 197 * disp displacement 198 */ 199 200 void buildEA(code *c,int base,int index,int scale,targ_size_t disp) 201 { 202 ubyte rm; 203 ubyte sib; 204 ubyte rex = 0; 205 206 sib = 0; 207 if (!I16) 208 { uint ss; 209 210 assert(index != SP); 211 212 switch (scale) 213 { case 1: ss = 0; break; 214 case 2: ss = 1; break; 215 case 4: ss = 2; break; 216 case 8: ss = 3; break; 217 default: assert(0); 218 } 219 220 if (base == -1) 221 { 222 if (index == -1) 223 rm = modregrm(0,0,5); 224 else 225 { 226 rm = modregrm(0,0,4); 227 sib = modregrm(ss,index & 7,5); 228 if (index & 8) 229 rex |= REX_X; 230 } 231 } 232 else if (index == -1) 233 { 234 if (base == SP) 235 { 236 rm = modregrm(2, 0, 4); 237 sib = modregrm(0, 4, SP); 238 } 239 else 240 { rm = modregrm(2, 0, base & 7); 241 if (base & 8) 242 { rex |= REX_B; 243 if (base == R12) 244 { 245 rm = modregrm(2, 0, 4); 246 sib = modregrm(0, 4, 4); 247 } 248 } 249 } 250 } 251 else 252 { 253 rm = modregrm(2, 0, 4); 254 sib = modregrm(ss,index & 7,base & 7); 255 if (index & 8) 256 rex |= REX_X; 257 if (base & 8) 258 rex |= REX_B; 259 } 260 } 261 else 262 { 263 // -1 AX CX DX BX SP BP SI DI 264 static immutable ubyte[9][9] EA16rm = 265 [ 266 [ 0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85, ], // -1 267 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // AX 268 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // CX 269 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // DX 270 [ 0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81, ], // BX 271 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // SP 272 [ 0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83, ], // BP 273 [ 0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09, ], // SI 274 [ 0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09, ] // DI 275 ]; 276 277 assert(scale == 1); 278 rm = EA16rm[base + 1][index + 1]; 279 assert(rm != 9); 280 } 281 c.Irm = rm; 282 c.Isib = sib; 283 c.Irex = rex; 284 c.IFL1 = FLconst; 285 c.IEV1.Vuns = cast(targ_uns)disp; 286 } 287 288 /********************************************* 289 * Build REX, modregrm and sib bytes 290 */ 291 292 uint buildModregrm(int mod, int reg, int rm) 293 { 294 uint m; 295 if (I16) 296 m = modregrm(mod, reg, rm); 297 else 298 { 299 if ((rm & 7) == SP && mod != 3) 300 m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4); 301 else 302 m = modregrm(mod,reg & 7,rm & 7); 303 if (reg & 8) 304 m |= REX_R << 16; 305 if (rm & 8) 306 m |= REX_B << 16; 307 } 308 return m; 309 } 310 311 /**************************************** 312 * Generate code for eecontext 313 */ 314 315 void genEEcode() 316 { 317 CodeBuilder cdb; 318 cdb.ctor(); 319 320 eecontext.EEin++; 321 regcon.immed.mval = 0; 322 regm_t retregs = 0; //regmask(eecontext.EEelem.Ety); 323 assert(EEStack.offset >= REGSIZE); 324 cod3_stackadj(cdb, cast(int)(EEStack.offset - REGSIZE)); 325 cdb.gen1(0x50 + SI); // PUSH ESI 326 cdb.genadjesp(cast(int)EEStack.offset); 327 gencodelem(cdb, eecontext.EEelem, &retregs, false); 328 code *c = cdb.finish(); 329 assignaddrc(c); 330 pinholeopt(c,null); 331 jmpaddr(c); 332 eecontext.EEcode = gen1(c, 0xCC); // INT 3 333 eecontext.EEin--; 334 } 335 336 337 /******************************************** 338 * Gen a save/restore sequence for mask of registers. 339 * Params: 340 * regm = mask of registers to save 341 * cdbsave = save code appended here 342 * cdbrestore = restore code appended here 343 * Returns: 344 * amount of stack consumed 345 */ 346 347 uint gensaverestore(regm_t regm,ref CodeBuilder cdbsave,ref CodeBuilder cdbrestore) 348 { 349 //printf("gensaverestore2(%s)\n", regm_str(regm)); 350 regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01; 351 if (!regm) 352 return 0; 353 354 uint stackused = 0; 355 356 code *[regm.sizeof * 8] restore; 357 358 reg_t i; 359 for (i = 0; regm; i++) 360 { 361 if (regm & 1) 362 { 363 code *cs2; 364 if (i == ES && I16) 365 { 366 stackused += REGSIZE; 367 cdbsave.gen1(0x06); // PUSH ES 368 cs2 = gen1(null, 0x07); // POP ES 369 } 370 else if (i == ST0 || i == ST01) 371 { 372 CodeBuilder cdb; 373 cdb.ctor(); 374 gensaverestore87(1 << i, cdbsave, cdb); 375 cs2 = cdb.finish(); 376 } 377 else if (i >= XMM0 || I64 || cgstate.funcarg.size) 378 { uint idx; 379 regsave.save(cdbsave, i, &idx); 380 CodeBuilder cdb; 381 cdb.ctor(); 382 regsave.restore(cdb, i, idx); 383 cs2 = cdb.finish(); 384 } 385 else 386 { 387 stackused += REGSIZE; 388 cdbsave.gen1(0x50 + (i & 7)); // PUSH i 389 cs2 = gen1(null, 0x58 + (i & 7)); // POP i 390 if (i & 8) 391 { code_orrex(cdbsave.last(), REX_B); 392 code_orrex(cs2, REX_B); 393 } 394 } 395 restore[i] = cs2; 396 } 397 else 398 restore[i] = null; 399 regm >>= 1; 400 } 401 402 while (i) 403 { 404 code *c = restore[--i]; 405 if (c) 406 { 407 cdbrestore.append(c); 408 } 409 } 410 411 return stackused; 412 } 413 414 415 /**************************************** 416 * Clean parameters off stack. 417 * Input: 418 * numpara amount to adjust stack pointer 419 * keepmsk mask of registers to not destroy 420 */ 421 422 void genstackclean(ref CodeBuilder cdb,uint numpara,regm_t keepmsk) 423 { 424 //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean); 425 if (numpara && (cgstate.stackclean || STACKALIGN >= 16)) 426 { 427 /+ 428 if (0 && // won't work if operand of scodelem 429 numpara == stackpush && // if this is all those pushed 430 needframe && // and there will be a BP 431 !config.windows && 432 !(regcon.mvar & fregsaved) // and no registers will be pushed 433 ) 434 genregs(cdb,0x89,BP,SP); // MOV SP,BP 435 else 436 +/ 437 { 438 regm_t scratchm = 0; 439 440 if (numpara == REGSIZE && config.flags4 & CFG4space) 441 { 442 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar; 443 } 444 445 if (scratchm) 446 { 447 reg_t r; 448 allocreg(cdb, &scratchm, &r, TYint); 449 cdb.gen1(0x58 + r); // POP r 450 } 451 else 452 cod3_stackadj(cdb, -numpara); 453 } 454 stackpush -= numpara; 455 cdb.genadjesp(-numpara); 456 } 457 } 458 459 /********************************* 460 * Generate code for a logical expression. 461 * Input: 462 * e elem 463 * jcond 464 * bit 1 if true then goto jump address if e 465 * if false then goto jump address if !e 466 * 2 don't call save87() 467 * fltarg FLcode or FLblock, flavor of target if e evaluates to jcond 468 * targ either code or block pointer to destination 469 */ 470 471 void logexp(ref CodeBuilder cdb, elem *e, int jcond, uint fltarg, code *targ) 472 { 473 //printf("logexp(e = %p, jcond = %d)\n", e, jcond); 474 int no87 = (jcond & 2) == 0; 475 docommas(cdb, &e); // scan down commas 476 cgstate.stackclean++; 477 478 code* c, ce; 479 if (!OTleaf(e.Eoper) && !e.Ecount) // if operator and not common sub 480 { 481 switch (e.Eoper) 482 { 483 case OPoror: 484 { 485 con_t regconsave; 486 if (jcond & 1) 487 { 488 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 489 regconsave = regcon; 490 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 491 } 492 else 493 { 494 code *cnop = gennop(null); 495 logexp(cdb, e.EV.E1, jcond | 1, FLcode, cnop); 496 regconsave = regcon; 497 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 498 cdb.append(cnop); 499 } 500 andregcon(®consave); 501 freenode(e); 502 cgstate.stackclean--; 503 return; 504 } 505 506 case OPandand: 507 { 508 con_t regconsave; 509 if (jcond & 1) 510 { 511 code *cnop = gennop(null); // a dummy target address 512 logexp(cdb, e.EV.E1, jcond & ~1, FLcode, cnop); 513 regconsave = regcon; 514 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 515 cdb.append(cnop); 516 } 517 else 518 { 519 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 520 regconsave = regcon; 521 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 522 } 523 andregcon(®consave); 524 freenode(e); 525 cgstate.stackclean--; 526 return; 527 } 528 529 case OPnot: 530 jcond ^= 1; 531 goto case OPbool; 532 533 case OPbool: 534 case OPs8_16: 535 case OPu8_16: 536 case OPs16_32: 537 case OPu16_32: 538 case OPs32_64: 539 case OPu32_64: 540 case OPu32_d: 541 case OPd_ld: 542 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 543 freenode(e); 544 cgstate.stackclean--; 545 return; 546 547 case OPcond: 548 { 549 code *cnop2 = gennop(null); // addresses of start of leaves 550 code *cnop = gennop(null); 551 logexp(cdb, e.EV.E1, false, FLcode, cnop2); // eval condition 552 con_t regconold = regcon; 553 logexp(cdb, e.EV.E2.EV.E1, jcond, fltarg, targ); 554 genjmp(cdb, JMP, FLcode, cast(block *) cnop); // skip second leaf 555 556 con_t regconsave = regcon; 557 regcon = regconold; 558 559 cdb.append(cnop2); 560 logexp(cdb, e.EV.E2.EV.E2, jcond, fltarg, targ); 561 andregcon(®conold); 562 andregcon(®consave); 563 freenode(e.EV.E2); 564 freenode(e); 565 cdb.append(cnop); 566 cgstate.stackclean--; 567 return; 568 } 569 570 default: 571 break; 572 } 573 } 574 575 /* Special code for signed long compare. 576 * Not necessary for I64 until we do cents. 577 */ 578 if (OTrel2(e.Eoper) && // if < <= >= > 579 !e.Ecount && 580 ( (I16 && tybasic(e.EV.E1.Ety) == TYlong && tybasic(e.EV.E2.Ety) == TYlong) || 581 (I32 && tybasic(e.EV.E1.Ety) == TYllong && tybasic(e.EV.E2.Ety) == TYllong)) 582 ) 583 { 584 longcmp(cdb, e, jcond != 0, fltarg, targ); 585 cgstate.stackclean--; 586 return; 587 } 588 589 regm_t retregs = mPSW; // return result in flags 590 opcode_t op = jmpopcode(e); // get jump opcode 591 if (!(jcond & 1)) 592 op ^= 0x101; // toggle jump condition(s) 593 codelem(cdb, e, &retregs, true); // evaluate elem 594 if (no87) 595 cse_flush(cdb,no87); // flush CSE's to memory 596 genjmp(cdb, op, fltarg, cast(block *) targ); // generate jmp instruction 597 cgstate.stackclean--; 598 } 599 600 /****************************** 601 * Routine to aid in setting things up for gen(). 602 * Look for common subexpression. 603 * Can handle indirection operators, but not if they're common subs. 604 * Input: 605 * e -> elem where we get some of the data from 606 * cs -> partially filled code to add 607 * op = opcode 608 * reg = reg field of (mod reg r/m) 609 * offset = data to be added to Voffset field 610 * keepmsk = mask of registers we must not destroy 611 * desmsk = mask of registers destroyed by executing the instruction 612 * Returns: 613 * pointer to code generated 614 */ 615 616 void loadea(ref CodeBuilder cdb,elem *e,code *cs,uint op,uint reg,targ_size_t offset, 617 regm_t keepmsk,regm_t desmsk) 618 { 619 code* c, cg, cd; 620 621 debug 622 if (debugw) 623 printf("loadea: e=%p cs=%p op=x%x reg=%s offset=%lld keepmsk=%s desmsk=%s\n", 624 e, cs, op, regstring[reg], cast(ulong)offset, regm_str(keepmsk), regm_str(desmsk)); 625 assert(e); 626 cs.Iflags = 0; 627 cs.Irex = 0; 628 cs.Iop = op; 629 tym_t tym = e.Ety; 630 int sz = tysize(tym); 631 632 /* Determine if location we want to get is in a register. If so, */ 633 /* substitute the register for the EA. */ 634 /* Note that operators don't go through this. CSE'd operators are */ 635 /* picked up by comsub(). */ 636 if (e.Ecount && /* if cse */ 637 e.Ecount != e.Ecomsub && /* and cse was generated */ 638 op != LEA && op != 0xC4 && /* and not an LEA or LES */ 639 (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ 640 (op & 0xFFF8) != 0xD8) // and not 8087 opcode 641 { 642 assert(OTleaf(e.Eoper)); /* can't handle this */ 643 regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs 644 if (op == 0xFF && reg == 6) 645 rm &= ~XMMREGS; // can't PUSH an XMM register 646 if (sz > REGSIZE) // value is in 2 or 4 registers 647 { 648 if (I16 && sz == 8) // value is in 4 registers 649 { 650 static immutable regm_t[4] rmask = [ mDX,mCX,mBX,mAX ]; 651 rm &= rmask[cast(size_t)(offset >> 1)]; 652 } 653 else if (offset) 654 rm &= mMSW; /* only high words */ 655 else 656 rm &= mLSW; /* only low words */ 657 } 658 for (uint i = 0; rm; i++) 659 { 660 if (mask(i) & rm) 661 { 662 if (regcon.cse.value[i] == e && // if register has elem 663 /* watch out for a CWD destroying DX */ 664 !(i == DX && op == 0xF7 && desmsk & mDX)) 665 { 666 /* if ES, then it can only be a load */ 667 if (i == ES) 668 { 669 if (op != 0x8B) 670 break; // not a load 671 cs.Iop = 0x8C; /* MOV reg,ES */ 672 cs.Irm = modregrm(3, 0, reg & 7); 673 if (reg & 8) 674 code_orrex(cs, REX_B); 675 } 676 else // XXX reg,i 677 { 678 cs.Irm = modregrm(3, reg & 7, i & 7); 679 if (reg & 8) 680 cs.Irex |= REX_R; 681 if (i & 8) 682 cs.Irex |= REX_B; 683 if (sz == 1 && I64 && (i >= 4 || reg >= 4)) 684 cs.Irex |= REX; 685 if (I64 && (sz == 8 || sz == 16)) 686 cs.Irex |= REX_W; 687 } 688 goto L2; 689 } 690 rm &= ~mask(i); 691 } 692 } 693 } 694 695 getlvalue(cdb, cs, e, keepmsk); 696 if (offset == REGSIZE) 697 getlvalue_msw(cs); 698 else 699 cs.IEV1.Voffset += offset; 700 if (I64) 701 { 702 if (reg >= 4 && sz == 1) // if byte register 703 // Can only address those 8 bit registers if a REX byte is present 704 cs.Irex |= REX; 705 if ((op & 0xFFFFFFF8) == 0xD8) 706 cs.Irex &= ~REX_W; // not needed for x87 ops 707 if (mask(reg) & XMMREGS && 708 (op == LODSD || op == STOSD)) 709 cs.Irex &= ~REX_W; // not needed for xmm ops 710 } 711 code_newreg(cs, reg); // OR in reg field 712 if (!I16) 713 { 714 if (reg == 6 && op == 0xFF || /* don't PUSH a word */ 715 op == MOVZXw || op == MOVSXw || /* MOVZX/MOVSX */ 716 (op & 0xFFF8) == 0xD8 || /* 8087 instructions */ 717 op == LEA) /* LEA */ 718 { 719 cs.Iflags &= ~CFopsize; 720 if (reg == 6 && op == 0xFF) // if PUSH 721 cs.Irex &= ~REX_W; // REX is ignored for PUSH anyway 722 } 723 } 724 else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT()) 725 cs.Iflags |= CFwait; 726 L2: 727 getregs(cdb, desmsk); // save any regs we destroy 728 729 /* KLUDGE! fix up DX for divide instructions */ 730 if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ 731 { 732 if (reg == 7) /* if IDIV */ 733 { 734 cdb.gen1(0x99); // CWD 735 if (I64 && sz == 8) 736 code_orrex(cdb.last(), REX_W); 737 } 738 else if (reg == 6) // if DIV 739 genregs(cdb, 0x33, DX, DX); // XOR DX,DX 740 } 741 742 // Eliminate MOV reg,reg 743 if ((cs.Iop & ~3) == 0x88 && 744 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 745 { 746 uint r = cs.Irm & 7; 747 if (cs.Irex & REX_B) 748 r |= 8; 749 if (r == reg) 750 cs.Iop = NOP; 751 } 752 753 // Eliminate MOV xmmreg,xmmreg 754 if ((cs.Iop & ~(LODSD ^ STOSS)) == LODSD && // detect LODSD, LODSS, STOSD, STOSS 755 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 756 { 757 reg_t r = cs.Irm & 7; 758 if (cs.Irex & REX_B) 759 r |= 8; 760 if (r == (reg - XMM0)) 761 cs.Iop = NOP; 762 } 763 764 cdb.gen(cs); 765 } 766 767 768 /************************** 769 * Get addressing mode. 770 */ 771 772 uint getaddrmode(regm_t idxregs) 773 { 774 uint mode; 775 776 if (I16) 777 { 778 static ubyte error() { assert(0); } 779 780 mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ 781 (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ 782 (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ 783 error(); 784 } 785 else 786 { 787 const reg = findreg(idxregs & (ALLREGS | mBP)); 788 if (reg == R12) 789 mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4); 790 else 791 mode = modregrmx(2,0,reg); 792 } 793 return mode; 794 } 795 796 void setaddrmode(code *c, regm_t idxregs) 797 { 798 uint mode = getaddrmode(idxregs); 799 c.Irm = mode & 0xFF; 800 c.Isib = (mode >> 8) & 0xFF; 801 c.Irex &= ~REX_B; 802 c.Irex |= mode >> 16; 803 } 804 805 /********************************************** 806 */ 807 808 void getlvalue_msw(code *c) 809 { 810 if (c.IFL1 == FLreg) 811 { 812 const regmsw = c.IEV1.Vsym.Sregmsw; 813 c.Irm = (c.Irm & ~7) | (regmsw & 7); 814 if (regmsw & 8) 815 c.Irex |= REX_B; 816 else 817 c.Irex &= ~REX_B; 818 } 819 else 820 c.IEV1.Voffset += REGSIZE; 821 } 822 823 /********************************************** 824 */ 825 826 void getlvalue_lsw(code *c) 827 { 828 if (c.IFL1 == FLreg) 829 { 830 const reglsw = c.IEV1.Vsym.Sreglsw; 831 c.Irm = (c.Irm & ~7) | (reglsw & 7); 832 if (reglsw & 8) 833 c.Irex |= REX_B; 834 else 835 c.Irex &= ~REX_B; 836 } 837 else 838 c.IEV1.Voffset -= REGSIZE; 839 } 840 841 /****************** 842 * Compute addressing mode. 843 * Generate & return sequence of code (if any). 844 * Return in cs the info on it. 845 * Input: 846 * pcs -> where to store data about addressing mode 847 * e -> the lvalue elem 848 * keepmsk mask of registers we must not destroy or use 849 * if (keepmsk & RMstore), this will be only a store operation 850 * into the lvalue 851 * if (keepmsk & RMload), this will be a read operation only 852 */ 853 854 void getlvalue(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk) 855 { 856 uint fl, f, opsave; 857 elem* e1, e11, e12; 858 bool e1isadd, e1free; 859 reg_t reg; 860 tym_t e1ty; 861 Symbol* s; 862 863 //printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 864 //elem_print(e); 865 assert(e); 866 elem_debug(e); 867 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 868 { 869 s = e.EV.Vsym; 870 fl = s.Sfl; 871 if (tyfloating(s.ty())) 872 objmod.fltused(); 873 } 874 else 875 fl = FLoper; 876 pcs.IFL1 = cast(ubyte)fl; 877 pcs.Iflags = CFoff; /* only want offsets */ 878 pcs.Irex = 0; 879 pcs.IEV1.Voffset = 0; 880 881 tym_t ty = e.Ety; 882 uint sz = tysize(ty); 883 if (tyfloating(ty)) 884 objmod.fltused(); 885 if (I64 && (sz == 8 || sz == 16) && !tyvector(ty)) 886 pcs.Irex |= REX_W; 887 if (!I16 && sz == SHORTSIZE) 888 pcs.Iflags |= CFopsize; 889 if (ty & mTYvolatile) 890 pcs.Iflags |= CFvolatile; 891 892 switch (fl) 893 { 894 case FLoper: 895 debug 896 if (debugw) printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 897 898 switch (e.Eoper) 899 { 900 case OPadd: // this way when we want to do LEA 901 e1 = e; 902 e1free = false; 903 e1isadd = true; 904 break; 905 906 case OPind: 907 case OPpostinc: // when doing (*p++ = ...) 908 case OPpostdec: // when doing (*p-- = ...) 909 case OPbt: 910 case OPbtc: 911 case OPbtr: 912 case OPbts: 913 case OPvecfill: 914 e1 = e.EV.E1; 915 e1free = true; 916 e1isadd = e1.Eoper == OPadd; 917 break; 918 919 default: 920 printf("function: %s\n", funcsym_p.Sident.ptr); 921 elem_print(e); 922 assert(0); 923 } 924 e1ty = tybasic(e1.Ety); 925 if (e1isadd) 926 { 927 e12 = e1.EV.E2; 928 e11 = e1.EV.E1; 929 } 930 931 /* First see if we can replace *(e+&v) with 932 * MOV idxreg,e 933 * EA = [ES:] &v+idxreg 934 */ 935 f = FLconst; 936 937 /* Is address of `s` relative to RIP ? 938 */ 939 static bool relativeToRIP(Symbol* s) 940 { 941 if (!I64) 942 return false; 943 if (config.exe == EX_WIN64) 944 return true; 945 if (config.flags3 & CFG3pie) 946 { 947 if (s.Sfl == FLtlsdata || s.ty() & mTYthread) 948 { 949 if (s.Sclass == SCglobal || s.Sclass == SCstatic || s.Sclass == SClocstat) 950 return false; 951 } 952 return true; 953 } 954 else 955 return (config.flags3 & CFG3pic) != 0; 956 } 957 958 if (e1isadd && 959 ((e12.Eoper == OPrelconst && 960 !relativeToRIP(e12.EV.Vsym) && 961 (f = el_fl(e12)) != FLfardata 962 ) || 963 (e12.Eoper == OPconst && !I16 && !e1.Ecount && (!I64 || el_signx32(e12)))) && 964 e1.Ecount == e1.Ecomsub && 965 (!e1.Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) && 966 tysize(e11.Ety) == REGSIZE 967 ) 968 { 969 uint t; /* component of r/m field */ 970 int ss; 971 int ssi; 972 973 if (e12.Eoper == OPrelconst) 974 f = el_fl(e12); 975 /*assert(datafl[f]);*/ /* what if addr of func? */ 976 if (!I16) 977 { /* Any register can be an index register */ 978 regm_t idxregs = allregs & ~keepmsk; 979 assert(idxregs); 980 981 /* See if e1.EV.E1 can be a scaled index */ 982 ss = isscaledindex(e11); 983 if (ss) 984 { 985 /* Load index register with result of e11.EV.E1 */ 986 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 987 reg = findreg(idxregs); 988 { 989 t = stackfl[f] ? 2 : 0; 990 pcs.Irm = modregrm(t, 0, 4); 991 pcs.Isib = modregrm(ss, reg & 7, 5); 992 if (reg & 8) 993 pcs.Irex |= REX_X; 994 } 995 } 996 else if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && 997 !e11.Ecount && 998 e11.EV.E2.Eoper == OPconst && 999 (ssi = ssindex(e11.Eoper, e11.EV.E2.EV.Vuns)) != 0 1000 ) 1001 { 1002 regm_t scratchm; 1003 1004 char ssflags = ssindex_array[ssi].ssflags; 1005 if (ssflags & SSFLnobp && stackfl[f]) 1006 goto L6; 1007 1008 // Load index register with result of e11.EV.E1 1009 scodelem(cdb, e11.EV.E1, &idxregs, keepmsk, true); 1010 reg = findreg(idxregs); 1011 1012 int ss1 = ssindex_array[ssi].ss1; 1013 if (ssflags & SSFLlea) 1014 { 1015 assert(!stackfl[f]); 1016 pcs.Irm = modregrm(2,0,4); 1017 pcs.Isib = modregrm(ss1, reg & 7, reg & 7); 1018 if (reg & 8) 1019 pcs.Irex |= REX_X | REX_B; 1020 } 1021 else 1022 { 1023 int rbase; 1024 reg_t r; 1025 1026 scratchm = ALLREGS & ~keepmsk; 1027 allocreg(cdb, &scratchm, &r, TYint); 1028 1029 if (ssflags & SSFLnobase1) 1030 { 1031 t = 0; 1032 rbase = 5; 1033 } 1034 else 1035 { 1036 t = 0; 1037 rbase = reg; 1038 if (rbase == BP || rbase == R13) 1039 { 1040 static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1]; 1041 1042 // IMUL r,BP,imm32 1043 cdb.genc2(0x69, modregxrmx(3, r, rbase), imm32[ss1]); 1044 goto L7; 1045 } 1046 } 1047 1048 cdb.gen2sib(LEA, modregxrm(t, r, 4), modregrm(ss1, reg & 7 ,rbase & 7)); 1049 if (reg & 8) 1050 code_orrex(cdb.last(), REX_X); 1051 if (rbase & 8) 1052 code_orrex(cdb.last(), REX_B); 1053 if (I64) 1054 code_orrex(cdb.last(), REX_W); 1055 1056 if (ssflags & SSFLnobase1) 1057 { 1058 cdb.last().IFL1 = FLconst; 1059 cdb.last().IEV1.Vuns = 0; 1060 } 1061 L7: 1062 if (ssflags & SSFLnobase) 1063 { 1064 t = stackfl[f] ? 2 : 0; 1065 rbase = 5; 1066 } 1067 else 1068 { 1069 t = 2; 1070 rbase = r; 1071 assert(rbase != BP); 1072 } 1073 pcs.Irm = modregrm(t, 0, 4); 1074 pcs.Isib = modregrm(ssindex_array[ssi].ss2, r & 7, rbase & 7); 1075 if (r & 8) 1076 pcs.Irex |= REX_X; 1077 if (rbase & 8) 1078 pcs.Irex |= REX_B; 1079 } 1080 freenode(e11.EV.E2); 1081 freenode(e11); 1082 } 1083 else 1084 { 1085 L6: 1086 /* Load index register with result of e11 */ 1087 scodelem(cdb, e11, &idxregs, keepmsk, true); 1088 setaddrmode(pcs, idxregs); 1089 if (stackfl[f]) /* if we need [EBP] too */ 1090 { 1091 uint idx = pcs.Irm & 7; 1092 if (pcs.Irex & REX_B) 1093 pcs.Irex = (pcs.Irex & ~REX_B) | REX_X; 1094 pcs.Isib = modregrm(0, idx, BP); 1095 pcs.Irm = modregrm(2, 0, 4); 1096 } 1097 } 1098 } 1099 else 1100 { 1101 regm_t idxregs = IDXREGS & ~keepmsk; /* only these can be index regs */ 1102 assert(idxregs); 1103 if (stackfl[f]) /* if stack data type */ 1104 { 1105 idxregs &= mSI | mDI; /* BX can't index off stack */ 1106 if (!idxregs) goto L1; /* index regs aren't avail */ 1107 t = 6; /* [BP+SI+disp] */ 1108 } 1109 else 1110 t = 0; /* [SI + disp] */ 1111 scodelem(cdb, e11, &idxregs, keepmsk, true); // load idx reg 1112 pcs.Irm = cast(ubyte)(getaddrmode(idxregs) ^ t); 1113 } 1114 if (f == FLpara) 1115 refparam = true; 1116 else if (f == FLauto || f == FLbprel || f == FLfltreg || f == FLfast) 1117 reflocal = true; 1118 else if (f == FLcsdata || tybasic(e12.Ety) == TYcptr) 1119 pcs.Iflags |= CFcs; 1120 else 1121 assert(f != FLreg); 1122 pcs.IFL1 = cast(ubyte)f; 1123 if (f != FLconst) 1124 pcs.IEV1.Vsym = e12.EV.Vsym; 1125 pcs.IEV1.Voffset = e12.EV.Voffset; /* += ??? */ 1126 1127 /* If e1 is a CSE, we must generate an addressing mode */ 1128 /* but also leave EA in registers so others can use it */ 1129 if (e1.Ecount) 1130 { 1131 uint flagsave; 1132 1133 regm_t idxregs = IDXREGS & ~keepmsk; 1134 allocreg(cdb, &idxregs, ®, TYoffset); 1135 1136 /* If desired result is a far pointer, we'll have */ 1137 /* to load another register with the segment of v */ 1138 if (e1ty == TYfptr) 1139 { 1140 reg_t msreg; 1141 1142 idxregs |= mMSW & ALLREGS & ~keepmsk; 1143 allocreg(cdb, &idxregs, &msreg, TYfptr); 1144 msreg = findregmsw(idxregs); 1145 /* MOV msreg,segreg */ 1146 genregs(cdb, 0x8C, segfl[f], msreg); 1147 } 1148 opsave = pcs.Iop; 1149 flagsave = pcs.Iflags; 1150 ubyte rexsave = pcs.Irex; 1151 pcs.Iop = LEA; 1152 code_newreg(pcs, reg); 1153 if (!I16) 1154 pcs.Iflags &= ~CFopsize; 1155 if (I64) 1156 pcs.Irex |= REX_W; 1157 cdb.gen(pcs); // LEA idxreg,EA 1158 cssave(e1,idxregs,true); 1159 if (!I16) 1160 { 1161 pcs.Iflags = flagsave; 1162 pcs.Irex = rexsave; 1163 } 1164 if (stackfl[f] && (config.wflags & WFssneds)) // if pointer into stack 1165 pcs.Iflags |= CFss; // add SS: override 1166 pcs.Iop = opsave; 1167 pcs.IFL1 = FLoffset; 1168 pcs.IEV1.Vuns = 0; 1169 setaddrmode(pcs, idxregs); 1170 } 1171 freenode(e12); 1172 if (e1free) 1173 freenode(e1); 1174 goto Lptr; 1175 } 1176 1177 L1: 1178 1179 /* The rest of the cases could be a far pointer */ 1180 1181 regm_t idxregs; 1182 idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs 1183 assert(idxregs); 1184 if (!I16 && 1185 (sz == REGSIZE || (I64 && sz == 4)) && 1186 keepmsk & RMstore) 1187 idxregs |= regcon.mvar; 1188 1189 switch (e1ty) 1190 { 1191 case TYfptr: /* if far pointer */ 1192 case TYhptr: 1193 idxregs = (mES | IDXREGS) & ~keepmsk; // need segment too 1194 assert(idxregs & mES); 1195 pcs.Iflags |= CFes; /* ES segment override */ 1196 break; 1197 1198 case TYsptr: /* if pointer to stack */ 1199 if (config.wflags & WFssneds) // if SS != DS 1200 pcs.Iflags |= CFss; /* then need SS: override */ 1201 break; 1202 1203 case TYfgPtr: 1204 if (I32) 1205 pcs.Iflags |= CFgs; 1206 else if (I64) 1207 pcs.Iflags |= CFfs; 1208 else 1209 assert(0); 1210 break; 1211 1212 case TYcptr: /* if pointer to code */ 1213 pcs.Iflags |= CFcs; /* then need CS: override */ 1214 break; 1215 1216 default: 1217 break; 1218 } 1219 pcs.IFL1 = FLoffset; 1220 pcs.IEV1.Vuns = 0; 1221 1222 /* see if we can replace *(e+c) with 1223 * MOV idxreg,e 1224 * [MOV ES,segment] 1225 * EA = [ES:]c[idxreg] 1226 */ 1227 if (e1isadd && e12.Eoper == OPconst && 1228 (!I64 || el_signx32(e12)) && 1229 (tysize(e12.Ety) == REGSIZE || (I64 && tysize(e12.Ety) == 4)) && 1230 (!e1.Ecount || !e1free) 1231 ) 1232 { 1233 int ss; 1234 1235 pcs.IEV1.Vuns = e12.EV.Vuns; 1236 freenode(e12); 1237 if (e1free) freenode(e1); 1238 if (!I16 && e11.Eoper == OPadd && !e11.Ecount && 1239 tysize(e11.Ety) == REGSIZE) 1240 { 1241 e12 = e11.EV.E2; 1242 e11 = e11.EV.E1; 1243 e1 = e1.EV.E1; 1244 e1free = true; 1245 goto L4; 1246 } 1247 if (!I16 && (ss = isscaledindex(e11)) != 0) 1248 { // (v * scale) + const 1249 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 1250 reg = findreg(idxregs); 1251 pcs.Irm = modregrm(0, 0, 4); 1252 pcs.Isib = modregrm(ss, reg & 7, 5); 1253 if (reg & 8) 1254 pcs.Irex |= REX_X; 1255 } 1256 else 1257 { 1258 scodelem(cdb, e11, &idxregs, keepmsk, true); // load index reg 1259 setaddrmode(pcs, idxregs); 1260 } 1261 goto Lptr; 1262 } 1263 1264 /* Look for *(v1 + v2) 1265 * EA = [v1][v2] 1266 */ 1267 1268 if (!I16 && e1isadd && (!e1.Ecount || !e1free) && 1269 (_tysize[e1ty] == REGSIZE || (I64 && _tysize[e1ty] == 4))) 1270 { 1271 L4: 1272 regm_t idxregs2; 1273 uint base, index; 1274 1275 // Look for *(v1 + v2 << scale) 1276 int ss = isscaledindex(e12); 1277 if (ss) 1278 { 1279 scodelem(cdb, e11, &idxregs, keepmsk, true); 1280 idxregs2 = allregs & ~(idxregs | keepmsk); 1281 cdisscaledindex(cdb, e12, &idxregs2, keepmsk | idxregs); 1282 } 1283 1284 // Look for *(v1 << scale + v2) 1285 else if ((ss = isscaledindex(e11)) != 0) 1286 { 1287 idxregs2 = idxregs; 1288 cdisscaledindex(cdb, e11, &idxregs2, keepmsk); 1289 idxregs = allregs & ~(idxregs2 | keepmsk); 1290 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1291 } 1292 // Look for *(((v1 << scale) + c1) + v2) 1293 else if (e11.Eoper == OPadd && !e11.Ecount && 1294 e11.EV.E2.Eoper == OPconst && 1295 (ss = isscaledindex(e11.EV.E1)) != 0 1296 ) 1297 { 1298 pcs.IEV1.Vuns = e11.EV.E2.EV.Vuns; 1299 idxregs2 = idxregs; 1300 cdisscaledindex(cdb, e11.EV.E1, &idxregs2, keepmsk); 1301 idxregs = allregs & ~(idxregs2 | keepmsk); 1302 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1303 freenode(e11.EV.E2); 1304 freenode(e11); 1305 } 1306 else 1307 { 1308 scodelem(cdb, e11, &idxregs, keepmsk, true); 1309 idxregs2 = allregs & ~(idxregs | keepmsk); 1310 scodelem(cdb, e12, &idxregs2, keepmsk | idxregs, true); 1311 } 1312 base = findreg(idxregs); 1313 index = findreg(idxregs2); 1314 pcs.Irm = modregrm(2, 0, 4); 1315 pcs.Isib = modregrm(ss, index & 7, base & 7); 1316 if (index & 8) 1317 pcs.Irex |= REX_X; 1318 if (base & 8) 1319 pcs.Irex |= REX_B; 1320 if (e1free) 1321 freenode(e1); 1322 1323 goto Lptr; 1324 } 1325 1326 /* give up and replace *e1 with 1327 * MOV idxreg,e 1328 * EA = 0[idxreg] 1329 * pinholeopt() will usually correct the 0, we need it in case 1330 * we have a pointer to a long and need an offset to the second 1331 * word. 1332 */ 1333 1334 assert(e1free); 1335 scodelem(cdb, e1, &idxregs, keepmsk, true); // load index register 1336 setaddrmode(pcs, idxregs); 1337 Lptr: 1338 if (config.flags3 & CFG3ptrchk) 1339 cod3_ptrchk(cdb, pcs, keepmsk); // validate pointer code 1340 break; 1341 1342 case FLdatseg: 1343 assert(0); 1344 static if (0) 1345 { 1346 pcs.Irm = modregrm(0, 0, BPRM); 1347 pcs.IEVpointer1 = e.EVpointer; 1348 break; 1349 } 1350 1351 case FLfltreg: 1352 reflocal = true; 1353 pcs.Irm = modregrm(2, 0, BPRM); 1354 pcs.IEV1.Vint = 0; 1355 break; 1356 1357 case FLreg: 1358 goto L2; 1359 1360 case FLpara: 1361 if (s.Sclass == SCshadowreg) 1362 goto case FLfast; 1363 Lpara: 1364 refparam = true; 1365 pcs.Irm = modregrm(2, 0, BPRM); 1366 goto L2; 1367 1368 case FLauto: 1369 case FLfast: 1370 if (regParamInPreg(s)) 1371 { 1372 regm_t pregm = s.Spregm(); 1373 /* See if the parameter is still hanging about in a register, 1374 * and so can we load from that register instead. 1375 */ 1376 if (regcon.params & pregm /*&& s.Spreg2 == NOREG && !(pregm & XMMREGS)*/) 1377 { 1378 if (keepmsk & RMload && !anyiasm) 1379 { 1380 auto voffset = e.EV.Voffset; 1381 if (sz <= REGSIZE) 1382 { 1383 const reg_t preg = (voffset >= REGSIZE) ? s.Spreg2 : s.Spreg; 1384 if (voffset >= REGSIZE) 1385 voffset -= REGSIZE; 1386 1387 /* preg could be NOREG if it's a variadic function and we're 1388 * in Win64 shadow regs and we're offsetting to get to the start 1389 * of the variadic args. 1390 */ 1391 if (preg != NOREG && regcon.params & mask(preg)) 1392 { 1393 //printf("sz %d, preg %s, Voffset %d\n", cast(int)sz, regm_str(mask(preg)), cast(int)voffset); 1394 if (mask(preg) & XMMREGS && sz != REGSIZE) 1395 { 1396 /* The following fails with this from std.math on Linux64: 1397 void main() 1398 { 1399 alias T = float; 1400 T x = T.infinity; 1401 T e = T.infinity; 1402 int eptr; 1403 T v = frexp(x, eptr); 1404 assert(isIdentical(e, v)); 1405 } 1406 */ 1407 } 1408 else if (voffset == 0) 1409 { 1410 pcs.Irm = modregrm(3, 0, preg & 7); 1411 if (preg & 8) 1412 pcs.Irex |= REX_B; 1413 if (I64 && sz == 1 && preg >= 4) 1414 pcs.Irex |= REX; 1415 regcon.used |= mask(preg); 1416 break; 1417 } 1418 else if (voffset == 1 && sz == 1 && preg < 4) 1419 { 1420 pcs.Irm = modregrm(3, 0, 4 | preg); // use H register 1421 regcon.used |= mask(preg); 1422 break; 1423 } 1424 } 1425 } 1426 } 1427 else 1428 regcon.params &= ~pregm; 1429 } 1430 } 1431 if (s.Sclass == SCshadowreg) 1432 goto Lpara; 1433 goto case FLbprel; 1434 1435 case FLbprel: 1436 reflocal = true; 1437 pcs.Irm = modregrm(2, 0, BPRM); 1438 goto L2; 1439 1440 case FLextern: 1441 if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) 1442 { 1443 if (config.exe & EX_windos) 1444 { 1445 if (I64) 1446 { // GS:[88] 1447 pcs.Irm = modregrm(0, 0, 4); 1448 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1449 pcs.IFL1 = FLconst; 1450 pcs.IEV1.Vuns = 88; 1451 pcs.Iflags = CFgs; 1452 pcs.Irex |= REX_W; 1453 break; 1454 } 1455 else 1456 { 1457 pcs.Iflags |= CFfs; // add FS: override 1458 } 1459 } 1460 else if (config.exe & (EX_OSX | EX_OSX64)) 1461 { 1462 } 1463 else if (config.exe & EX_posix) 1464 assert(0); 1465 } 1466 if (s.ty() & mTYcs && cast(bool) LARGECODE) 1467 goto Lfardata; 1468 goto L3; 1469 1470 case FLtlsdata: 1471 if (config.exe & EX_posix) 1472 goto L3; 1473 assert(0); 1474 1475 case FLdata: 1476 case FLudata: 1477 case FLcsdata: 1478 case FLgot: 1479 case FLgotoff: 1480 L3: 1481 pcs.Irm = modregrm(0, 0, BPRM); 1482 L2: 1483 if (fl == FLreg) 1484 { 1485 //printf("test: FLreg, %s %d regcon.mvar = %s\n", 1486 // s.Sident.ptr, cast(int)e.EV.Voffset, regm_str(regcon.mvar)); 1487 if (!(s.Sregm & regcon.mvar)) 1488 symbol_print(s); 1489 assert(s.Sregm & regcon.mvar); 1490 1491 /* Attempting to paint a float as an integer or an integer as a float 1492 * will cause serious problems since the EA is loaded separatedly from 1493 * the opcode. The only way to deal with this is to prevent enregistering 1494 * such variables. 1495 */ 1496 if (tyxmmreg(ty) && !(s.Sregm & XMMREGS) || 1497 !tyxmmreg(ty) && (s.Sregm & XMMREGS)) 1498 cgreg_unregister(s.Sregm); 1499 1500 if ( 1501 s.Sclass == SCregpar || 1502 s.Sclass == SCparameter) 1503 { refparam = true; 1504 reflocal = true; // kludge to set up prolog 1505 } 1506 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1507 if (s.Sreglsw & 8) 1508 pcs.Irex |= REX_B; 1509 if (e.EV.Voffset == REGSIZE && sz == REGSIZE) 1510 { 1511 pcs.Irm = modregrm(3, 0, s.Sregmsw & 7); 1512 if (s.Sregmsw & 8) 1513 pcs.Irex |= REX_B; 1514 else 1515 pcs.Irex &= ~REX_B; 1516 } 1517 else if (e.EV.Voffset == 1 && sz == 1) 1518 { 1519 assert(s.Sregm & BYTEREGS); 1520 assert(s.Sreglsw < 4); 1521 pcs.Irm |= 4; // use 2nd byte of register 1522 } 1523 else 1524 { 1525 assert(!e.EV.Voffset); 1526 if (I64 && sz == 1 && s.Sreglsw >= 4) 1527 pcs.Irex |= REX; 1528 } 1529 } 1530 else if (s.ty() & mTYcs && !(fl == FLextern && LARGECODE)) 1531 { 1532 pcs.Iflags |= CFcs | CFoff; 1533 } 1534 if (config.flags3 & CFG3pic && 1535 (fl == FLtlsdata || s.ty() & mTYthread)) 1536 { 1537 if (I32) 1538 { 1539 if (config.flags3 & CFG3pie) 1540 { 1541 pcs.Iflags |= CFgs; 1542 } 1543 } 1544 else if (I64) 1545 { 1546 if (config.flags3 & CFG3pie && 1547 (s.Sclass == SCglobal || s.Sclass == SCstatic || s.Sclass == SClocstat)) 1548 { 1549 pcs.Iflags |= CFfs; 1550 pcs.Irm = modregrm(0, 0, 4); 1551 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1552 } 1553 else 1554 { 1555 pcs.Iflags |= CFopsize; 1556 pcs.Irex = 0x48; 1557 } 1558 } 1559 } 1560 pcs.IEV1.Vsym = s; 1561 pcs.IEV1.Voffset = e.EV.Voffset; 1562 if (sz == 1) 1563 { /* Don't use SI or DI for this variable */ 1564 s.Sflags |= GTbyte; 1565 if (I64 ? e.EV.Voffset > 0 : e.EV.Voffset > 1) 1566 { 1567 debug if (debugr) printf("'%s' not reg cand due to byte offset\n", s.Sident.ptr); 1568 s.Sflags &= ~GTregcand; 1569 } 1570 } 1571 else if (e.EV.Voffset || sz > tysize(s.Stype.Tty)) 1572 { 1573 debug if (debugr) printf("'%s' not reg cand due to offset or size\n", s.Sident.ptr); 1574 s.Sflags &= ~GTregcand; 1575 } 1576 1577 if (config.fpxmmregs && tyfloating(s.ty()) && !tyfloating(ty)) 1578 { 1579 debug if (debugr) printf("'%s' not reg cand due to mix float and int\n", s.Sident.ptr); 1580 // Can't successfully mix XMM register variables accessed as integers 1581 s.Sflags &= ~GTregcand; 1582 } 1583 1584 if (!(keepmsk & RMstore)) // if not store only 1585 s.Sflags |= SFLread; // assume we are doing a read 1586 break; 1587 1588 case FLpseudo: 1589 version (MARS) 1590 { 1591 { 1592 getregs(cdb, mask(s.Sreglsw)); 1593 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1594 if (s.Sreglsw & 8) 1595 pcs.Irex |= REX_B; 1596 if (e.EV.Voffset == 1 && sz == 1) 1597 { assert(s.Sregm & BYTEREGS); 1598 assert(s.Sreglsw < 4); 1599 pcs.Irm |= 4; // use 2nd byte of register 1600 } 1601 else 1602 { assert(!e.EV.Voffset); 1603 if (I64 && sz == 1 && s.Sreglsw >= 4) 1604 pcs.Irex |= REX; 1605 } 1606 break; 1607 } 1608 } 1609 else 1610 { 1611 { 1612 uint u = s.Sreglsw; 1613 getregs(cdb, pseudomask[u]); 1614 pcs.Irm = modregrm(3, 0, pseudoreg[u] & 7); 1615 break; 1616 } 1617 } 1618 1619 case FLfardata: 1620 case FLfunc: /* reading from code seg */ 1621 if (config.exe & EX_flat) 1622 goto L3; 1623 Lfardata: 1624 { 1625 regm_t regm = ALLREGS & ~keepmsk; // need scratch register 1626 allocreg(cdb, ®m, ®, TYint); 1627 getregs(cdb,mES); 1628 // MOV mreg,seg of symbol 1629 cdb.gencs(0xB8 + reg, 0, FLextern, s); 1630 cdb.last().Iflags = CFseg; 1631 cdb.gen2(0x8E, modregrmx(3, 0, reg)); // MOV ES,reg 1632 pcs.Iflags |= CFes | CFoff; /* ES segment override */ 1633 goto L3; 1634 } 1635 1636 case FLstack: 1637 assert(!I16); 1638 pcs.Irm = modregrm(2, 0, 4); 1639 pcs.Isib = modregrm(0, 4, SP); 1640 pcs.IEV1.Vsym = s; 1641 pcs.IEV1.Voffset = e.EV.Voffset; 1642 break; 1643 1644 default: 1645 WRFL(cast(FL)fl); 1646 symbol_print(s); 1647 assert(0); 1648 } 1649 } 1650 1651 /***************************** 1652 * Given an opcode and EA in cs, generate code 1653 * for each floating register in turn. 1654 * Input: 1655 * tym either TYdouble or TYfloat 1656 */ 1657 1658 void fltregs(ref CodeBuilder cdb, code* pcs, tym_t tym) 1659 { 1660 assert(!I64); 1661 tym = tybasic(tym); 1662 if (I32) 1663 { 1664 getregs(cdb,(tym == TYfloat) ? mAX : mAX | mDX); 1665 if (tym != TYfloat) 1666 { 1667 pcs.IEV1.Voffset += REGSIZE; 1668 NEWREG(pcs.Irm,DX); 1669 cdb.gen(pcs); 1670 pcs.IEV1.Voffset -= REGSIZE; 1671 } 1672 NEWREG(pcs.Irm,AX); 1673 cdb.gen(pcs); 1674 } 1675 else 1676 { 1677 getregs(cdb,(tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16); 1678 pcs.IEV1.Voffset += (tym == TYfloat) ? 2 : 6; 1679 if (tym == TYfloat) 1680 NEWREG(pcs.Irm, DX); 1681 else 1682 NEWREG(pcs.Irm, AX); 1683 cdb.gen(pcs); 1684 pcs.IEV1.Voffset -= 2; 1685 if (tym == TYfloat) 1686 NEWREG(pcs.Irm, AX); 1687 else 1688 NEWREG(pcs.Irm, BX); 1689 cdb.gen(pcs); 1690 if (tym != TYfloat) 1691 { 1692 pcs.IEV1.Voffset -= 2; 1693 NEWREG(pcs.Irm, CX); 1694 cdb.gen(pcs); 1695 pcs.IEV1.Voffset -= 2; /* note that exit is with Voffset unaltered */ 1696 NEWREG(pcs.Irm, DX); 1697 cdb.gen(pcs); 1698 } 1699 } 1700 } 1701 1702 1703 /***************************** 1704 * Given a result in registers, test it for true or false. 1705 * Will fail if TYfptr and the reg is ES! 1706 * If saveflag is true, preserve the contents of the 1707 * registers. 1708 */ 1709 1710 void tstresult(ref CodeBuilder cdb, regm_t regm, tym_t tym, uint saveflag) 1711 { 1712 reg_t scrreg; // scratch register 1713 regm_t scrregm; 1714 1715 //if (!(regm & (mBP | ALLREGS))) 1716 //printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n", 1717 //regm_str(regm),tym,saveflag); 1718 1719 assert(regm & (XMMREGS | mBP | ALLREGS)); 1720 tym = tybasic(tym); 1721 reg_t reg = findreg(regm); 1722 uint sz = _tysize[tym]; 1723 if (sz == 1) 1724 { 1725 assert(regm & BYTEREGS); 1726 genregs(cdb, 0x84, reg, reg); // TEST regL,regL 1727 if (I64 && reg >= 4) 1728 code_orrex(cdb.last(), REX); 1729 return; 1730 } 1731 if (regm & XMMREGS) 1732 { 1733 reg_t xreg; 1734 regm_t xregs = XMMREGS & ~regm; 1735 allocreg(cdb,&xregs, &xreg, TYdouble); 1736 opcode_t op = 0; 1737 if (tym == TYdouble || tym == TYidouble || tym == TYcdouble) 1738 op = 0x660000; 1739 cdb.gen2(op | 0x0F57, modregrm(3, xreg-XMM0, xreg-XMM0)); // XORPS xreg,xreg 1740 cdb.gen2(op | 0x0F2E, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1741 if (tym == TYcfloat || tym == TYcdouble) 1742 { code *cnop = gennop(null); 1743 genjmp(cdb, JNE, FLcode, cast(block *) cnop); // JNE L1 1744 genjmp(cdb, JP, FLcode, cast(block *) cnop); // JP L1 1745 reg = findreg(regm & ~mask(reg)); 1746 cdb.gen2(op | 0x0F2E, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1747 cdb.append(cnop); 1748 } 1749 return; 1750 } 1751 if (sz <= REGSIZE) 1752 { 1753 if (!I16) 1754 { 1755 if (tym == TYfloat) 1756 { 1757 if (saveflag) 1758 { 1759 scrregm = allregs & ~regm; // possible scratch regs 1760 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1761 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1762 reg = scrreg; 1763 } 1764 getregs(cdb, mask(reg)); 1765 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 1766 return; 1767 } 1768 gentstreg(cdb,reg); // TEST reg,reg 1769 if (sz == SHORTSIZE) 1770 cdb.last().Iflags |= CFopsize; // 16 bit operands 1771 else if (sz == 8) 1772 code_orrex(cdb.last(), REX_W); 1773 } 1774 else 1775 gentstreg(cdb, reg); // TEST reg,reg 1776 return; 1777 } 1778 1779 if (saveflag || tyfv(tym)) 1780 { 1781 L1: 1782 scrregm = ALLREGS & ~regm; // possible scratch regs 1783 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1784 if (I32 || sz == REGSIZE * 2) 1785 { 1786 assert(regm & mMSW && regm & mLSW); 1787 1788 reg = findregmsw(regm); 1789 if (I32) 1790 { 1791 if (tyfv(tym)) 1792 genregs(cdb, MOVZXw, scrreg, reg); // MOVZX scrreg,msreg 1793 else 1794 { 1795 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1796 if (tym == TYdouble || tym == TYdouble_alias) 1797 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1798 } 1799 } 1800 else 1801 { 1802 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1803 if (tym == TYfloat) 1804 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1805 } 1806 reg = findreglsw(regm); 1807 genorreg(cdb, scrreg, reg); // OR scrreg,lsreg 1808 } 1809 else if (sz == 8) 1810 { 1811 // !I32 1812 genmovreg(cdb, scrreg, AX); // MOV scrreg,AX 1813 if (tym == TYdouble || tym == TYdouble_alias) 1814 cdb.gen2(0xD1 ,modregrm(3, 4, scrreg)); // SHL scrreg,1 1815 genorreg(cdb, scrreg, BX); // OR scrreg,BX 1816 genorreg(cdb, scrreg, CX); // OR scrreg,CX 1817 genorreg(cdb, scrreg, DX); // OR scrreg,DX 1818 } 1819 else 1820 assert(0); 1821 } 1822 else 1823 { 1824 if (I32 || sz == REGSIZE * 2) 1825 { 1826 // can't test ES:LSW for 0 1827 assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP)); 1828 1829 reg = findregmsw(regm); 1830 if (regcon.mvar & mask(reg)) // if register variable 1831 goto L1; // don't trash it 1832 getregs(cdb, mask(reg)); // we're going to trash reg 1833 if (tyfloating(tym) && sz == 2 * _tysize[TYint]) 1834 cdb.gen2(0xD1, modregrm(3 ,4, reg)); // SHL reg,1 1835 genorreg(cdb, reg, findreglsw(regm)); // OR reg,reg+1 1836 if (I64) 1837 code_orrex(cdb.last(), REX_W); 1838 } 1839 else if (sz == 8) 1840 { assert(regm == DOUBLEREGS_16); 1841 getregs(cdb,mAX); // allocate AX 1842 if (tym == TYdouble || tym == TYdouble_alias) 1843 cdb.gen2(0xD1, modregrm(3, 4, AX)); // SHL AX,1 1844 genorreg(cdb, AX, BX); // OR AX,BX 1845 genorreg(cdb, AX, CX); // OR AX,CX 1846 genorreg(cdb, AX, DX); // OR AX,DX 1847 } 1848 else 1849 assert(0); 1850 } 1851 code_orflag(cdb.last(),CFpsw); 1852 } 1853 1854 /****************************** 1855 * Given the result of an expression is in retregs, 1856 * generate necessary code to return result in *pretregs. 1857 */ 1858 1859 void fixresult(ref CodeBuilder cdb, elem *e, regm_t retregs, regm_t *pretregs) 1860 { 1861 //printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs)); 1862 if (*pretregs == 0) return; // if don't want result 1863 assert(e && retregs); // need something to work with 1864 regm_t forccs = *pretregs & mPSW; 1865 regm_t forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS); 1866 tym_t tym = tybasic(e.Ety); 1867 1868 if (tym == TYstruct) 1869 { 1870 if (e.Eoper == OPpair || e.Eoper == OPrpair) 1871 { 1872 if (I64) 1873 tym = TYucent; 1874 else 1875 tym = TYullong; 1876 } 1877 else 1878 // Hack to support cdstreq() 1879 tym = (forregs & mMSW) ? TYfptr : TYnptr; 1880 } 1881 int sz = _tysize[tym]; 1882 1883 if (sz == 1) 1884 { 1885 assert(retregs & BYTEREGS); 1886 const reg = findreg(retregs); 1887 if (e.Eoper == OPvar && 1888 e.EV.Voffset == 1 && 1889 e.EV.Vsym.Sfl == FLreg) 1890 { 1891 assert(reg < 4); 1892 if (forccs) 1893 cdb.gen2(0x84, modregrm(3, reg | 4, reg | 4)); // TEST regH,regH 1894 forccs = 0; 1895 } 1896 } 1897 1898 reg_t reg,rreg; 1899 if ((retregs & forregs) == retregs) // if already in right registers 1900 *pretregs = retregs; 1901 else if (forregs) // if return the result in registers 1902 { 1903 if ((forregs | retregs) & (mST01 | mST0)) 1904 { 1905 fixresult87(cdb, e, retregs, pretregs); 1906 return; 1907 } 1908 uint opsflag = false; 1909 if (I16 && sz == 8) 1910 { 1911 if (forregs & mSTACK) 1912 { 1913 assert(retregs == DOUBLEREGS_16); 1914 // Push floating regs 1915 cdb.gen1(0x50 + AX); 1916 cdb.gen1(0x50 + BX); 1917 cdb.gen1(0x50 + CX); 1918 cdb.gen1(0x50 + DX); 1919 stackpush += DOUBLESIZE; 1920 } 1921 else if (retregs & mSTACK) 1922 { 1923 assert(forregs == DOUBLEREGS_16); 1924 // Pop floating regs 1925 getregs(cdb,forregs); 1926 cdb.gen1(0x58 + DX); 1927 cdb.gen1(0x58 + CX); 1928 cdb.gen1(0x58 + BX); 1929 cdb.gen1(0x58 + AX); 1930 stackpush -= DOUBLESIZE; 1931 retregs = DOUBLEREGS_16; // for tstresult() below 1932 } 1933 else 1934 { 1935 debug 1936 printf("retregs = %s, forregs = %s\n", regm_str(retregs), regm_str(forregs)), 1937 assert(0); 1938 } 1939 if (!OTleaf(e.Eoper)) 1940 opsflag = true; 1941 } 1942 else 1943 { 1944 allocreg(cdb, pretregs, &rreg, tym); // allocate return regs 1945 if (retregs & XMMREGS) 1946 { 1947 reg = findreg(retregs & XMMREGS); 1948 if (mask(rreg) & XMMREGS) 1949 genmovreg(cdb, rreg, reg, tym); 1950 else 1951 { 1952 // MOVSD floatreg, XMM? 1953 cdb.genxmmreg(xmmstore(tym), reg, 0, tym); 1954 // MOV rreg,floatreg 1955 cdb.genfltreg(0x8B,rreg,0); 1956 if (sz == 8) 1957 { 1958 if (I32) 1959 { 1960 rreg = findregmsw(*pretregs); 1961 cdb.genfltreg(0x8B, rreg,4); 1962 } 1963 else 1964 code_orrex(cdb.last(),REX_W); 1965 } 1966 } 1967 } 1968 else if (forregs & XMMREGS) 1969 { 1970 reg = findreg(retregs & (mBP | ALLREGS)); 1971 switch (sz) 1972 { 1973 case 4: 1974 cdb.gen2(LODD, modregxrmx(3, rreg - XMM0, reg)); // MOVD xmm,reg 1975 break; 1976 1977 case 8: 1978 if (I32) 1979 { 1980 cdb.genfltreg(0x89, reg, 0); 1981 reg = findregmsw(retregs); 1982 cdb.genfltreg(0x89, reg, 4); 1983 cdb.genxmmreg(xmmload(tym), rreg, 0, tym); // MOVQ xmm,mem 1984 } 1985 else 1986 { 1987 cdb.gen2(LODD /* [sic!] */, modregxrmx(3, rreg - XMM0, reg)); 1988 code_orrex(cdb.last(), REX_W); // MOVQ xmm,reg 1989 } 1990 break; 1991 1992 default: 1993 assert(false); 1994 } 1995 checkSetVex(cdb.last(), tym); 1996 } 1997 else if (sz > REGSIZE) 1998 { 1999 uint msreg = findregmsw(retregs); 2000 uint lsreg = findreglsw(retregs); 2001 uint msrreg = findregmsw(*pretregs); 2002 uint lsrreg = findreglsw(*pretregs); 2003 2004 genmovreg(cdb, msrreg, msreg); // MOV msrreg,msreg 2005 genmovreg(cdb, lsrreg, lsreg); // MOV lsrreg,lsreg 2006 } 2007 else 2008 { 2009 assert(!(retregs & XMMREGS)); 2010 assert(!(forregs & XMMREGS)); 2011 reg = findreg(retregs & (mBP | ALLREGS)); 2012 if (I64 && sz <= 4) 2013 genregs(cdb, 0x89, reg, rreg); // only move 32 bits, and zero the top 32 bits 2014 else 2015 genmovreg(cdb, rreg, reg); // MOV rreg,reg 2016 } 2017 } 2018 cssave(e,retregs | *pretregs,opsflag); 2019 // Commented out due to Bugzilla 8840 2020 //forregs = 0; // don't care about result in reg cuz real result is in rreg 2021 retregs = *pretregs & ~mPSW; 2022 } 2023 if (forccs) // if return result in flags 2024 { 2025 if (retregs & (mST01 | mST0)) 2026 { 2027 *pretregs |= forccs; 2028 fixresult87(cdb, e, retregs, pretregs); 2029 } 2030 else 2031 tstresult(cdb, retregs, tym, forregs); 2032 } 2033 } 2034 2035 /******************************* 2036 * Extra information about each CLIB runtime library function. 2037 */ 2038 2039 enum 2040 { 2041 INF32 = 1, /// if 32 bit only 2042 INFfloat = 2, /// if this is floating point 2043 INFwkdone = 4, /// if weak extern is already done 2044 INF64 = 8, /// if 64 bit only 2045 INFpushebx = 0x10, /// push EBX before load_localgot() 2046 INFpusheabcdx = 0x20, /// pass EAX/EBX/ECX/EDX on stack, callee does ret 16 2047 } 2048 2049 struct ClibInfo 2050 { 2051 regm_t retregs16; /* registers that 16 bit result is returned in */ 2052 regm_t retregs32; /* registers that 32 bit result is returned in */ 2053 ubyte pop; // # of bytes popped off of stack upon return 2054 ubyte flags; /// INFxxx 2055 byte push87; // # of pushes onto the 8087 stack 2056 byte pop87; // # of pops off of the 8087 stack 2057 } 2058 2059 __gshared int clib_inited = false; // true if initialized 2060 2061 Symbol* symboly(const(char)* name, regm_t desregs) 2062 { 2063 Symbol *s = symbol_calloc(name); 2064 s.Stype = tsclib; 2065 s.Sclass = SCextern; 2066 s.Sfl = FLfunc; 2067 s.Ssymnum = 0; 2068 s.Sregsaved = ~desregs & (mBP | mES | ALLREGS); 2069 return s; 2070 } 2071 2072 void getClibInfo(uint clib, Symbol** ps, ClibInfo** pinfo) 2073 { 2074 __gshared Symbol*[CLIB.MAX] clibsyms; 2075 __gshared ClibInfo[CLIB.MAX] clibinfo; 2076 2077 if (!clib_inited) 2078 { 2079 for (size_t i = 0; i < CLIB.MAX; ++i) 2080 { 2081 Symbol* s = clibsyms[i]; 2082 if (s) 2083 { 2084 s.Sxtrnnum = 0; 2085 s.Stypidx = 0; 2086 clibinfo[i].flags &= ~INFwkdone; 2087 } 2088 } 2089 clib_inited = true; 2090 } 2091 2092 const uint ex_unix = (EX_LINUX | EX_LINUX64 | 2093 EX_OSX | EX_OSX64 | 2094 EX_FREEBSD | EX_FREEBSD64 | 2095 EX_OPENBSD | EX_OPENBSD64 | 2096 EX_DRAGONFLYBSD64 | 2097 EX_SOLARIS | EX_SOLARIS64); 2098 2099 ClibInfo* cinfo = &clibinfo[clib]; 2100 Symbol* s = clibsyms[clib]; 2101 if (!s) 2102 { 2103 2104 switch (clib) 2105 { 2106 case CLIB.lcmp: 2107 { 2108 const(char)* name = (config.exe & ex_unix) ? "__LCMP__" : "_LCMP@"; 2109 s = symboly(name, 0); 2110 } 2111 break; 2112 2113 case CLIB.lmul: 2114 { 2115 const(char)* name = (config.exe & ex_unix) ? "__LMUL__" : "_LMUL@"; 2116 s = symboly(name, mAX|mCX|mDX); 2117 cinfo.retregs16 = mDX|mAX; 2118 cinfo.retregs32 = mDX|mAX; 2119 } 2120 break; 2121 2122 case CLIB.ldiv: 2123 cinfo.retregs16 = mDX|mAX; 2124 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2125 { 2126 s = symboly("__divdi3", mAX|mBX|mCX|mDX); 2127 cinfo.flags = INFpushebx; 2128 cinfo.retregs32 = mDX|mAX; 2129 } 2130 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2131 { 2132 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2133 cinfo.flags = INFpushebx; 2134 cinfo.retregs32 = mDX|mAX; 2135 } 2136 else if (I32 && config.objfmt == OBJ_MSCOFF) 2137 { 2138 s = symboly("_alldiv", mAX|mBX|mCX|mDX); 2139 cinfo.flags = INFpusheabcdx; 2140 cinfo.retregs32 = mDX|mAX; 2141 } 2142 else 2143 { 2144 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2145 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2146 cinfo.retregs32 = mDX|mAX; 2147 } 2148 break; 2149 2150 case CLIB.lmod: 2151 cinfo.retregs16 = mCX|mBX; 2152 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2153 { 2154 s = symboly("__moddi3", mAX|mBX|mCX|mDX); 2155 cinfo.flags = INFpushebx; 2156 cinfo.retregs32 = mDX|mAX; 2157 } 2158 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2159 { 2160 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2161 cinfo.flags = INFpushebx; 2162 cinfo.retregs32 = mCX|mBX; 2163 } 2164 else if (I32 && config.objfmt == OBJ_MSCOFF) 2165 { 2166 s = symboly("_allrem", mAX|mBX|mCX|mDX); 2167 cinfo.flags = INFpusheabcdx; 2168 cinfo.retregs32 = mAX|mDX; 2169 } 2170 else 2171 { 2172 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2173 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2174 cinfo.retregs32 = mCX|mBX; 2175 } 2176 break; 2177 2178 case CLIB.uldiv: 2179 cinfo.retregs16 = mDX|mAX; 2180 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2181 { 2182 s = symboly("__udivdi3", mAX|mBX|mCX|mDX); 2183 cinfo.flags = INFpushebx; 2184 cinfo.retregs32 = mDX|mAX; 2185 } 2186 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2187 { 2188 s = symboly("__ULDIV2__", mAX|mBX|mCX|mDX); 2189 cinfo.flags = INFpushebx; 2190 cinfo.retregs32 = mDX|mAX; 2191 } 2192 else if (I32 && config.objfmt == OBJ_MSCOFF) 2193 { 2194 s = symboly("_aulldiv", mAX|mBX|mCX|mDX); 2195 cinfo.flags = INFpusheabcdx; 2196 cinfo.retregs32 = mDX|mAX; 2197 } 2198 else 2199 { 2200 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2201 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2202 cinfo.retregs32 = mDX|mAX; 2203 } 2204 break; 2205 2206 case CLIB.ulmod: 2207 cinfo.retregs16 = mCX|mBX; 2208 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2209 { 2210 s = symboly("__umoddi3", mAX|mBX|mCX|mDX); 2211 cinfo.flags = INFpushebx; 2212 cinfo.retregs32 = mDX|mAX; 2213 } 2214 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2215 { 2216 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2217 cinfo.flags = INFpushebx; 2218 cinfo.retregs32 = mCX|mBX; 2219 } 2220 else if (I32 && config.objfmt == OBJ_MSCOFF) 2221 { 2222 s = symboly("_aullrem", mAX|mBX|mCX|mDX); 2223 cinfo.flags = INFpusheabcdx; 2224 cinfo.retregs32 = mAX|mDX; 2225 } 2226 else 2227 { 2228 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2229 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2230 cinfo.retregs32 = mCX|mBX; 2231 } 2232 break; 2233 2234 // This section is only for Windows and DOS (i.e. machines without the x87 FPU) 2235 case CLIB.dmul: 2236 s = symboly("_DMUL@",mAX|mBX|mCX|mDX); 2237 cinfo.retregs16 = DOUBLEREGS_16; 2238 cinfo.retregs32 = DOUBLEREGS_32; 2239 cinfo.pop = 8; 2240 cinfo.flags = INFfloat; 2241 cinfo.push87 = 1; 2242 cinfo.pop87 = 1; 2243 break; 2244 2245 case CLIB.ddiv: 2246 s = symboly("_DDIV@",mAX|mBX|mCX|mDX); 2247 cinfo.retregs16 = DOUBLEREGS_16; 2248 cinfo.retregs32 = DOUBLEREGS_32; 2249 cinfo.pop = 8; 2250 cinfo.flags = INFfloat; 2251 cinfo.push87 = 1; 2252 cinfo.pop87 = 1; 2253 break; 2254 2255 case CLIB.dtst0: 2256 s = symboly("_DTST0@",0); 2257 cinfo.flags = INFfloat; 2258 break; 2259 2260 case CLIB.dtst0exc: 2261 s = symboly("_DTST0EXC@",0); 2262 cinfo.flags = INFfloat; 2263 break; 2264 2265 case CLIB.dcmp: 2266 s = symboly("_DCMP@",0); 2267 cinfo.pop = 8; 2268 cinfo.flags = INFfloat; 2269 cinfo.push87 = 1; 2270 cinfo.pop87 = 1; 2271 break; 2272 2273 case CLIB.dcmpexc: 2274 s = symboly("_DCMPEXC@",0); 2275 cinfo.pop = 8; 2276 cinfo.flags = INFfloat; 2277 cinfo.push87 = 1; 2278 cinfo.pop87 = 1; 2279 break; 2280 2281 case CLIB.dneg: 2282 s = symboly("_DNEG@",I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2283 cinfo.retregs16 = DOUBLEREGS_16; 2284 cinfo.retregs32 = DOUBLEREGS_32; 2285 cinfo.flags = INFfloat; 2286 break; 2287 2288 case CLIB.dadd: 2289 s = symboly("_DADD@",mAX|mBX|mCX|mDX); 2290 cinfo.retregs16 = DOUBLEREGS_16; 2291 cinfo.retregs32 = DOUBLEREGS_32; 2292 cinfo.pop = 8; 2293 cinfo.flags = INFfloat; 2294 cinfo.push87 = 1; 2295 cinfo.pop87 = 1; 2296 break; 2297 2298 case CLIB.dsub: 2299 s = symboly("_DSUB@",mAX|mBX|mCX|mDX); 2300 cinfo.retregs16 = DOUBLEREGS_16; 2301 cinfo.retregs32 = DOUBLEREGS_32; 2302 cinfo.pop = 8; 2303 cinfo.flags = INFfloat; 2304 cinfo.push87 = 1; 2305 cinfo.pop87 = 1; 2306 break; 2307 2308 case CLIB.fmul: 2309 s = symboly("_FMUL@",mAX|mBX|mCX|mDX); 2310 cinfo.retregs16 = FLOATREGS_16; 2311 cinfo.retregs32 = FLOATREGS_32; 2312 cinfo.flags = INFfloat; 2313 cinfo.push87 = 1; 2314 cinfo.pop87 = 1; 2315 break; 2316 2317 case CLIB.fdiv: 2318 s = symboly("_FDIV@",mAX|mBX|mCX|mDX); 2319 cinfo.retregs16 = FLOATREGS_16; 2320 cinfo.retregs32 = FLOATREGS_32; 2321 cinfo.flags = INFfloat; 2322 cinfo.push87 = 1; 2323 cinfo.pop87 = 1; 2324 break; 2325 2326 case CLIB.ftst0: 2327 s = symboly("_FTST0@",0); 2328 cinfo.flags = INFfloat; 2329 break; 2330 2331 case CLIB.ftst0exc: 2332 s = symboly("_FTST0EXC@",0); 2333 cinfo.flags = INFfloat; 2334 break; 2335 2336 case CLIB.fcmp: 2337 s = symboly("_FCMP@",0); 2338 cinfo.flags = INFfloat; 2339 cinfo.push87 = 1; 2340 cinfo.pop87 = 1; 2341 break; 2342 2343 case CLIB.fcmpexc: 2344 s = symboly("_FCMPEXC@",0); 2345 cinfo.flags = INFfloat; 2346 cinfo.push87 = 1; 2347 cinfo.pop87 = 1; 2348 break; 2349 2350 case CLIB.fneg: 2351 s = symboly("_FNEG@",I16 ? FLOATREGS_16 : FLOATREGS_32); 2352 cinfo.retregs16 = FLOATREGS_16; 2353 cinfo.retregs32 = FLOATREGS_32; 2354 cinfo.flags = INFfloat; 2355 break; 2356 2357 case CLIB.fadd: 2358 s = symboly("_FADD@",mAX|mBX|mCX|mDX); 2359 cinfo.retregs16 = FLOATREGS_16; 2360 cinfo.retregs32 = FLOATREGS_32; 2361 cinfo.flags = INFfloat; 2362 cinfo.push87 = 1; 2363 cinfo.pop87 = 1; 2364 break; 2365 2366 case CLIB.fsub: 2367 s = symboly("_FSUB@",mAX|mBX|mCX|mDX); 2368 cinfo.retregs16 = FLOATREGS_16; 2369 cinfo.retregs32 = FLOATREGS_32; 2370 cinfo.flags = INFfloat; 2371 cinfo.push87 = 1; 2372 cinfo.pop87 = 1; 2373 break; 2374 2375 case CLIB.dbllng: 2376 { 2377 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG" : "_DBLLNG@"; 2378 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2379 cinfo.retregs16 = mDX | mAX; 2380 cinfo.retregs32 = mAX; 2381 cinfo.flags = INFfloat; 2382 cinfo.push87 = 1; 2383 cinfo.pop87 = 1; 2384 break; 2385 } 2386 2387 case CLIB.lngdbl: 2388 { 2389 const(char)* name = (config.exe & ex_unix) ? "__LNGDBL" : "_LNGDBL@"; 2390 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2391 cinfo.retregs16 = DOUBLEREGS_16; 2392 cinfo.retregs32 = DOUBLEREGS_32; 2393 cinfo.flags = INFfloat; 2394 cinfo.push87 = 1; 2395 cinfo.pop87 = 1; 2396 break; 2397 } 2398 2399 case CLIB.dblint: 2400 { 2401 const(char)* name = (config.exe & ex_unix) ? "__DBLINT" : "_DBLINT@"; 2402 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2403 cinfo.retregs16 = mAX; 2404 cinfo.retregs32 = mAX; 2405 cinfo.flags = INFfloat; 2406 cinfo.push87 = 1; 2407 cinfo.pop87 = 1; 2408 break; 2409 } 2410 2411 case CLIB.intdbl: 2412 { 2413 const(char)* name = (config.exe & ex_unix) ? "__INTDBL" : "_INTDBL@"; 2414 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2415 cinfo.retregs16 = DOUBLEREGS_16; 2416 cinfo.retregs32 = DOUBLEREGS_32; 2417 cinfo.flags = INFfloat; 2418 cinfo.push87 = 1; 2419 cinfo.pop87 = 1; 2420 break; 2421 } 2422 2423 case CLIB.dbluns: 2424 { 2425 const(char)* name = (config.exe & ex_unix) ? "__DBLUNS" : "_DBLUNS@"; 2426 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2427 cinfo.retregs16 = mAX; 2428 cinfo.retregs32 = mAX; 2429 cinfo.flags = INFfloat; 2430 cinfo.push87 = 1; 2431 cinfo.pop87 = 1; 2432 break; 2433 } 2434 2435 case CLIB.unsdbl: 2436 // Y(DOUBLEREGS_32,"__UNSDBL"), // CLIB.unsdbl 2437 // Y(DOUBLEREGS_16,"_UNSDBL@"), 2438 // {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _UNSDBL@ unsdbl 2439 { 2440 const(char)* name = (config.exe & ex_unix) ? "__UNSDBL" : "_UNSDBL@"; 2441 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2442 cinfo.retregs16 = DOUBLEREGS_16; 2443 cinfo.retregs32 = DOUBLEREGS_32; 2444 cinfo.flags = INFfloat; 2445 cinfo.push87 = 1; 2446 cinfo.pop87 = 1; 2447 break; 2448 } 2449 2450 case CLIB.dblulng: 2451 { 2452 const(char)* name = (config.exe & ex_unix) ? "__DBLULNG" : "_DBLULNG@"; 2453 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2454 cinfo.retregs16 = mDX|mAX; 2455 cinfo.retregs32 = mAX; 2456 cinfo.flags = (config.exe & ex_unix) ? INFfloat | INF32 : INFfloat; 2457 cinfo.push87 = (config.exe & ex_unix) ? 0 : 1; 2458 cinfo.pop87 = 1; 2459 break; 2460 } 2461 2462 case CLIB.ulngdbl: 2463 { 2464 const(char)* name = (config.exe & ex_unix) ? "__ULNGDBL@" : "_ULNGDBL@"; 2465 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2466 cinfo.retregs16 = DOUBLEREGS_16; 2467 cinfo.retregs32 = DOUBLEREGS_32; 2468 cinfo.flags = INFfloat; 2469 cinfo.push87 = 1; 2470 cinfo.pop87 = 1; 2471 break; 2472 } 2473 2474 case CLIB.dblflt: 2475 { 2476 const(char)* name = (config.exe & ex_unix) ? "__DBLFLT" : "_DBLFLT@"; 2477 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2478 cinfo.retregs16 = FLOATREGS_16; 2479 cinfo.retregs32 = FLOATREGS_32; 2480 cinfo.flags = INFfloat; 2481 cinfo.push87 = 1; 2482 cinfo.pop87 = 1; 2483 break; 2484 } 2485 2486 case CLIB.fltdbl: 2487 { 2488 const(char)* name = (config.exe & ex_unix) ? "__FLTDBL" : "_FLTDBL@"; 2489 s = symboly(name, I16 ? ALLREGS : DOUBLEREGS_32); 2490 cinfo.retregs16 = DOUBLEREGS_16; 2491 cinfo.retregs32 = DOUBLEREGS_32; 2492 cinfo.flags = INFfloat; 2493 cinfo.push87 = 1; 2494 cinfo.pop87 = 1; 2495 break; 2496 } 2497 2498 case CLIB.dblllng: 2499 { 2500 const(char)* name = (config.exe & ex_unix) ? "__DBLLLNG" : "_DBLLLNG@"; 2501 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2502 cinfo.retregs16 = DOUBLEREGS_16; 2503 cinfo.retregs32 = mDX|mAX; 2504 cinfo.flags = INFfloat; 2505 cinfo.push87 = 1; 2506 cinfo.pop87 = 1; 2507 break; 2508 } 2509 2510 case CLIB.llngdbl: 2511 { 2512 const(char)* name = (config.exe & ex_unix) ? "__LLNGDBL" : "_LLNGDBL@"; 2513 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2514 cinfo.retregs16 = DOUBLEREGS_16; 2515 cinfo.retregs32 = DOUBLEREGS_32; 2516 cinfo.flags = INFfloat; 2517 cinfo.push87 = 1; 2518 cinfo.pop87 = 1; 2519 break; 2520 } 2521 2522 case CLIB.dblullng: 2523 { 2524 if (config.exe == EX_WIN64) 2525 { 2526 s = symboly("__DBLULLNG", DOUBLEREGS_32); 2527 cinfo.retregs32 = mAX; 2528 cinfo.flags = INFfloat; 2529 cinfo.push87 = 2; 2530 cinfo.pop87 = 2; 2531 } 2532 else 2533 { 2534 const(char)* name = (config.exe & ex_unix) ? "__DBLULLNG" : "_DBLULLNG@"; 2535 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2536 cinfo.retregs16 = DOUBLEREGS_16; 2537 cinfo.retregs32 = I64 ? mAX : mDX|mAX; 2538 cinfo.flags = INFfloat; 2539 cinfo.push87 = (config.exe & ex_unix) ? 2 : 1; 2540 cinfo.pop87 = (config.exe & ex_unix) ? 2 : 1; 2541 } 2542 break; 2543 } 2544 2545 case CLIB.ullngdbl: 2546 { 2547 if (config.exe == EX_WIN64) 2548 { 2549 s = symboly("__ULLNGDBL", DOUBLEREGS_32); 2550 cinfo.retregs32 = mAX; 2551 cinfo.flags = INFfloat; 2552 cinfo.push87 = 1; 2553 cinfo.pop87 = 1; 2554 } 2555 else 2556 { 2557 const(char)* name = (config.exe & ex_unix) ? "__ULLNGDBL" : "_ULLNGDBL@"; 2558 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2559 cinfo.retregs16 = DOUBLEREGS_16; 2560 cinfo.retregs32 = I64 ? mAX : DOUBLEREGS_32; 2561 cinfo.flags = INFfloat; 2562 cinfo.push87 = 1; 2563 cinfo.pop87 = 1; 2564 } 2565 break; 2566 } 2567 2568 case CLIB.dtst: 2569 { 2570 const(char)* name = (config.exe & ex_unix) ? "__DTST" : "_DTST@"; 2571 s = symboly(name, 0); 2572 cinfo.flags = INFfloat; 2573 break; 2574 } 2575 2576 case CLIB.vptrfptr: 2577 { 2578 const(char)* name = (config.exe & ex_unix) ? "__HTOFPTR" : "_HTOFPTR@"; 2579 s = symboly(name, mES|mBX); 2580 cinfo.retregs16 = mES|mBX; 2581 cinfo.retregs32 = mES|mBX; 2582 break; 2583 } 2584 2585 case CLIB.cvptrfptr: 2586 { 2587 const(char)* name = (config.exe & ex_unix) ? "__HCTOFPTR" : "_HCTOFPTR@"; 2588 s = symboly(name, mES|mBX); 2589 cinfo.retregs16 = mES|mBX; 2590 cinfo.retregs32 = mES|mBX; 2591 break; 2592 } 2593 2594 case CLIB._87topsw: 2595 { 2596 const(char)* name = (config.exe & ex_unix) ? "__87TOPSW" : "_87TOPSW@"; 2597 s = symboly(name, 0); 2598 cinfo.flags = INFfloat; 2599 break; 2600 } 2601 2602 case CLIB.fltto87: 2603 { 2604 const(char)* name = (config.exe & ex_unix) ? "__FLTTO87" : "_FLTTO87@"; 2605 s = symboly(name, mST0); 2606 cinfo.retregs16 = mST0; 2607 cinfo.retregs32 = mST0; 2608 cinfo.flags = INFfloat; 2609 cinfo.push87 = 1; 2610 break; 2611 } 2612 2613 case CLIB.dblto87: 2614 { 2615 const(char)* name = (config.exe & ex_unix) ? "__DBLTO87" : "_DBLTO87@"; 2616 s = symboly(name, mST0); 2617 cinfo.retregs16 = mST0; 2618 cinfo.retregs32 = mST0; 2619 cinfo.flags = INFfloat; 2620 cinfo.push87 = 1; 2621 break; 2622 } 2623 2624 case CLIB.dblint87: 2625 { 2626 const(char)* name = (config.exe & ex_unix) ? "__DBLINT87" : "_DBLINT87@"; 2627 s = symboly(name, mST0|mAX); 2628 cinfo.retregs16 = mAX; 2629 cinfo.retregs32 = mAX; 2630 cinfo.flags = INFfloat; 2631 break; 2632 } 2633 2634 case CLIB.dbllng87: 2635 { 2636 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG87" : "_DBLLNG87@"; 2637 s = symboly(name, mST0|mAX|mDX); 2638 cinfo.retregs16 = mDX|mAX; 2639 cinfo.retregs32 = mAX; 2640 cinfo.flags = INFfloat; 2641 break; 2642 } 2643 2644 case CLIB.ftst: 2645 { 2646 const(char)* name = (config.exe & ex_unix) ? "__FTST" : "_FTST@"; 2647 s = symboly(name, 0); 2648 cinfo.flags = INFfloat; 2649 break; 2650 } 2651 2652 case CLIB.fcompp: 2653 { 2654 const(char)* name = (config.exe & ex_unix) ? "__FCOMPP" : "_FCOMPP@"; 2655 s = symboly(name, 0); 2656 cinfo.retregs16 = mPSW; 2657 cinfo.retregs32 = mPSW; 2658 cinfo.flags = INFfloat; 2659 cinfo.pop87 = 2; 2660 break; 2661 } 2662 2663 case CLIB.ftest: 2664 { 2665 const(char)* name = (config.exe & ex_unix) ? "__FTEST" : "_FTEST@"; 2666 s = symboly(name, 0); 2667 cinfo.retregs16 = mPSW; 2668 cinfo.retregs32 = mPSW; 2669 cinfo.flags = INFfloat; 2670 break; 2671 } 2672 2673 case CLIB.ftest0: 2674 { 2675 const(char)* name = (config.exe & ex_unix) ? "__FTEST0" : "_FTEST0@"; 2676 s = symboly(name, 0); 2677 cinfo.retregs16 = mPSW; 2678 cinfo.retregs32 = mPSW; 2679 cinfo.flags = INFfloat; 2680 break; 2681 } 2682 2683 case CLIB.fdiv87: 2684 { 2685 const(char)* name = (config.exe & ex_unix) ? "__FDIVP" : "_FDIVP"; 2686 s = symboly(name, mST0|mAX|mBX|mCX|mDX); 2687 cinfo.retregs16 = mST0; 2688 cinfo.retregs32 = mST0; 2689 cinfo.flags = INFfloat; 2690 cinfo.push87 = 1; 2691 cinfo.pop87 = 1; 2692 break; 2693 } 2694 2695 // Complex numbers 2696 case CLIB.cmul: 2697 { 2698 s = symboly("_Cmul", mST0|mST01); 2699 cinfo.retregs16 = mST01; 2700 cinfo.retregs32 = mST01; 2701 cinfo.flags = INF32|INFfloat; 2702 cinfo.push87 = 3; 2703 cinfo.pop87 = 5; 2704 break; 2705 } 2706 2707 case CLIB.cdiv: 2708 { 2709 s = symboly("_Cdiv", mAX|mCX|mDX|mST0|mST01); 2710 cinfo.retregs16 = mST01; 2711 cinfo.retregs32 = mST01; 2712 cinfo.flags = INF32|INFfloat; 2713 cinfo.push87 = 0; 2714 cinfo.pop87 = 2; 2715 break; 2716 } 2717 2718 case CLIB.ccmp: 2719 { 2720 s = symboly("_Ccmp", mAX|mST0|mST01); 2721 cinfo.retregs16 = mPSW; 2722 cinfo.retregs32 = mPSW; 2723 cinfo.flags = INF32|INFfloat; 2724 cinfo.push87 = 0; 2725 cinfo.pop87 = 4; 2726 break; 2727 } 2728 2729 case CLIB.u64_ldbl: 2730 { 2731 const(char)* name = (config.exe & ex_unix) ? "__U64_LDBL" : "_U64_LDBL"; 2732 s = symboly(name, mST0); 2733 cinfo.retregs16 = mST0; 2734 cinfo.retregs32 = mST0; 2735 cinfo.flags = INF32|INF64|INFfloat; 2736 cinfo.push87 = 2; 2737 cinfo.pop87 = 1; 2738 break; 2739 } 2740 2741 case CLIB.ld_u64: 2742 { 2743 const(char)* name = (config.exe & ex_unix) ? (config.objfmt == OBJ_ELF || 2744 config.objfmt == OBJ_MACH ? 2745 "__LDBLULLNG" : "___LDBLULLNG") 2746 : "__LDBLULLNG"; 2747 s = symboly(name, mST0|mAX|mDX); 2748 cinfo.retregs16 = 0; 2749 cinfo.retregs32 = mDX|mAX; 2750 cinfo.flags = INF32|INF64|INFfloat; 2751 cinfo.push87 = 1; 2752 cinfo.pop87 = 2; 2753 break; 2754 } 2755 2756 default: 2757 assert(0); 2758 } 2759 clibsyms[clib] = s; 2760 } 2761 2762 *ps = s; 2763 *pinfo = cinfo; 2764 } 2765 2766 /******************************** 2767 * Generate code sequence to call C runtime library support routine. 2768 * clib = CLIB.xxxx 2769 * keepmask = mask of registers not to destroy. Currently can 2770 * handle only 1. Should use a temporary rather than 2771 * push/pop for speed. 2772 */ 2773 2774 void callclib(ref CodeBuilder cdb, elem* e, uint clib, regm_t* pretregs, regm_t keepmask) 2775 { 2776 //printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask)); 2777 //elem_print(e); 2778 2779 Symbol* s; 2780 ClibInfo* cinfo; 2781 getClibInfo(clib, &s, &cinfo); 2782 2783 if (I16) 2784 assert(!(cinfo.flags & (INF32 | INF64))); 2785 getregs(cdb,(~s.Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed 2786 keepmask &= ~s.Sregsaved; 2787 int npushed = numbitsset(keepmask); 2788 CodeBuilder cdbpop; 2789 cdbpop.ctor(); 2790 gensaverestore(keepmask, cdb, cdbpop); 2791 2792 save87regs(cdb,cinfo.push87); 2793 for (int i = 0; i < cinfo.push87; i++) 2794 push87(cdb); 2795 2796 for (int i = 0; i < cinfo.pop87; i++) 2797 pop87(); 2798 2799 if (config.target_cpu >= TARGET_80386 && clib == CLIB.lmul && !I32) 2800 { 2801 static immutable ubyte[23] lmul = 2802 [ 2803 0x66,0xc1,0xe1,0x10, // shl ECX,16 2804 0x8b,0xcb, // mov CX,BX ;ECX = CX,BX 2805 0x66,0xc1,0xe0,0x10, // shl EAX,16 2806 0x66,0x0f,0xac,0xd0,0x10, // shrd EAX,EDX,16 ;EAX = DX,AX 2807 0x66,0xf7,0xe1, // mul ECX 2808 0x66,0x0f,0xa4,0xc2,0x10, // shld EDX,EAX,16 ;DX,AX = EAX 2809 ]; 2810 2811 cdb.genasm(cast(char*)lmul.ptr, lmul.sizeof); 2812 } 2813 else 2814 { 2815 makeitextern(s); 2816 int nalign = 0; 2817 int pushebx = (cinfo.flags & INFpushebx) != 0; 2818 int pushall = (cinfo.flags & INFpusheabcdx) != 0; 2819 if (STACKALIGN >= 16) 2820 { // Align the stack (assume no args on stack) 2821 int npush = (npushed + pushebx + 4 * pushall) * REGSIZE + stackpush; 2822 if (npush & (STACKALIGN - 1)) 2823 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 2824 cod3_stackadj(cdb, nalign); 2825 } 2826 } 2827 if (pushebx) 2828 { 2829 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_FREEBSD | EX_FREEBSD64 | EX_DRAGONFLYBSD64)) 2830 { 2831 cdb.gen1(0x50 + CX); // PUSH ECX 2832 cdb.gen1(0x50 + BX); // PUSH EBX 2833 cdb.gen1(0x50 + DX); // PUSH EDX 2834 cdb.gen1(0x50 + AX); // PUSH EAX 2835 nalign += 4 * REGSIZE; 2836 } 2837 else 2838 { 2839 cdb.gen1(0x50 + BX); // PUSH EBX 2840 nalign += REGSIZE; 2841 } 2842 } 2843 if (pushall) 2844 { 2845 cdb.gen1(0x50 + CX); // PUSH ECX 2846 cdb.gen1(0x50 + BX); // PUSH EBX 2847 cdb.gen1(0x50 + DX); // PUSH EDX 2848 cdb.gen1(0x50 + AX); // PUSH EAX 2849 } 2850 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) 2851 { 2852 // Note: not for OSX 2853 /* Pass EBX on the stack instead, this is because EBX is used 2854 * for shared library function calls 2855 */ 2856 if (config.flags3 & CFG3pic) 2857 { 2858 load_localgot(cdb); // EBX gets set to this value 2859 } 2860 } 2861 2862 cdb.gencs(LARGECODE ? 0x9A : 0xE8,0,FLfunc,s); // CALL s 2863 if (nalign) 2864 cod3_stackadj(cdb, -nalign); 2865 calledafunc = 1; 2866 2867 version (SCPP) 2868 { 2869 if (I16 && // bug in Optlink for weak references 2870 config.flags3 & CFG3wkfloat && 2871 (cinfo.flags & (INFfloat | INFwkdone)) == INFfloat) 2872 { 2873 cinfo.flags |= INFwkdone; 2874 makeitextern(getRtlsym(RTLSYM_INTONLY)); 2875 objmod.wkext(s, getRtlsym(RTLSYM_INTONLY)); 2876 } 2877 } 2878 } 2879 if (I16) 2880 stackpush -= cinfo.pop; 2881 regm_t retregs = I16 ? cinfo.retregs16 : cinfo.retregs32; 2882 cdb.append(cdbpop); 2883 fixresult(cdb, e, retregs, pretregs); 2884 } 2885 2886 2887 /************************************************* 2888 * Helper function for converting OPparam's into array of Parameters. 2889 */ 2890 struct Parameter { elem* e; reg_t reg; reg_t reg2; uint numalign; } 2891 2892 //void fillParameters(elem* e, Parameter* parameters, int* pi); 2893 2894 void fillParameters(elem* e, Parameter* parameters, int* pi) 2895 { 2896 if (e.Eoper == OPparam) 2897 { 2898 fillParameters(e.EV.E1, parameters, pi); 2899 fillParameters(e.EV.E2, parameters, pi); 2900 freenode(e); 2901 } 2902 else 2903 { 2904 parameters[*pi].e = e; 2905 (*pi)++; 2906 } 2907 } 2908 2909 /*********************************** 2910 * tyf: type of the function 2911 */ 2912 FuncParamRegs FuncParamRegs_create(tym_t tyf) 2913 { 2914 FuncParamRegs result; 2915 2916 result.tyf = tyf; 2917 2918 if (I16) 2919 { 2920 result.numintegerregs = 0; 2921 result.numfloatregs = 0; 2922 } 2923 else if (I32) 2924 { 2925 if (tyf == TYjfunc) 2926 { 2927 static immutable ubyte[1] reglist1 = [ AX ]; 2928 result.argregs = ®list1[0]; 2929 result.numintegerregs = reglist1.length; 2930 } 2931 else if (tyf == TYmfunc) 2932 { 2933 static immutable ubyte[1] reglist2 = [ CX ]; 2934 result.argregs = ®list2[0]; 2935 result.numintegerregs = reglist2.length; 2936 } 2937 else 2938 result.numintegerregs = 0; 2939 result.numfloatregs = 0; 2940 } 2941 else if (I64 && config.exe == EX_WIN64) 2942 { 2943 static immutable ubyte[4] reglist3 = [ CX,DX,R8,R9 ]; 2944 result.argregs = ®list3[0]; 2945 result.numintegerregs = reglist3.length; 2946 2947 static immutable ubyte[4] freglist3 = [ XMM0, XMM1, XMM2, XMM3 ]; 2948 result.floatregs = &freglist3[0]; 2949 result.numfloatregs = freglist3.length; 2950 } 2951 else if (I64) 2952 { 2953 static immutable ubyte[6] reglist4 = [ DI,SI,DX,CX,R8,R9 ]; 2954 result.argregs = ®list4[0]; 2955 result.numintegerregs = reglist4.length; 2956 2957 static immutable ubyte[8] freglist4 = [ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 ]; 2958 result.floatregs = &freglist4[0]; 2959 result.numfloatregs = freglist4.length; 2960 } 2961 else 2962 assert(0); 2963 return result; 2964 } 2965 2966 /***************************************** 2967 * Allocate parameter of type t and ty to registers *preg1 and *preg2. 2968 * Params: 2969 * t = type, valid only if ty is TYstruct or TYarray 2970 * Returns: 2971 * false not allocated to any register 2972 * true *preg1, *preg2 set to allocated register pair 2973 */ 2974 2975 //bool type_jparam2(type* t, tym_t ty); 2976 2977 private bool type_jparam2(type* t, tym_t ty) 2978 { 2979 ty = tybasic(ty); 2980 2981 if (tyfloating(ty)) 2982 return false; 2983 else if (ty == TYstruct || ty == TYarray) 2984 { 2985 type_debug(t); 2986 targ_size_t sz = type_size(t); 2987 return (sz <= _tysize[TYnptr]) && 2988 (config.exe == EX_WIN64 || sz == 1 || sz == 2 || sz == 4 || sz == 8); 2989 } 2990 else if (tysize(ty) <= _tysize[TYnptr]) 2991 return true; 2992 return false; 2993 } 2994 2995 int FuncParamRegs_alloc(ref FuncParamRegs fpr, type* t, tym_t ty, reg_t* preg1, reg_t* preg2) 2996 { 2997 //printf("FuncParamRegs::alloc(ty: TY%sm t: %p)\n", tystring[tybasic(ty)], t); 2998 //if (t) type_print(t); 2999 3000 *preg1 = NOREG; 3001 *preg2 = NOREG; 3002 3003 type* t2 = null; 3004 tym_t ty2 = TYMAX; 3005 3006 // SROA with mixed registers 3007 if (ty & mTYxmmgpr) 3008 { 3009 ty = TYdouble; 3010 ty2 = TYllong; 3011 } 3012 else if (ty & mTYgprxmm) 3013 { 3014 ty = TYllong; 3015 ty2 = TYdouble; 3016 } 3017 3018 // Treat array of 1 the same as its element type 3019 // (Don't put volatile parameters in registers) 3020 if (tybasic(ty) == TYarray && tybasic(t.Tty) == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile) 3021 && type_size(t.Tnext) > 1) 3022 { 3023 t = t.Tnext; 3024 ty = t.Tty; 3025 } 3026 3027 if (tybasic(ty) == TYstruct && type_zeroSize(t, fpr.tyf)) 3028 return 0; // don't allocate into registers 3029 3030 ++fpr.i; 3031 3032 // If struct or array 3033 if (tyaggregate(ty)) 3034 { 3035 assert(t); 3036 if (config.exe == EX_WIN64) 3037 { 3038 /* Structs occupy a general purpose register, regardless of the struct 3039 * size or the number & types of its fields. 3040 */ 3041 t = null; 3042 ty = TYnptr; 3043 } 3044 else 3045 { 3046 type* targ1, targ2; 3047 if (tybasic(t.Tty) == TYstruct) 3048 { 3049 targ1 = t.Ttag.Sstruct.Sarg1type; 3050 targ2 = t.Ttag.Sstruct.Sarg2type; 3051 } 3052 else if (tybasic(t.Tty) == TYarray) 3053 { 3054 if (I64) 3055 argtypes(t, targ1, targ2); 3056 } 3057 else 3058 assert(0); 3059 3060 if (targ1) 3061 { 3062 t = targ1; 3063 ty = t.Tty; 3064 if (targ2) 3065 { 3066 t2 = targ2; 3067 ty2 = t2.Tty; 3068 } 3069 } 3070 else if (I64 && !targ2) 3071 return 0; 3072 } 3073 } 3074 3075 reg_t* preg = preg1; 3076 int regcntsave = fpr.regcnt; 3077 int xmmcntsave = fpr.xmmcnt; 3078 3079 if (config.exe == EX_WIN64) 3080 { 3081 if (tybasic(ty) == TYcfloat) 3082 { 3083 ty = TYnptr; // treat like a struct 3084 } 3085 } 3086 else if (I64) 3087 { 3088 if ((tybasic(ty) == TYcent || tybasic(ty) == TYucent) && 3089 fpr.numintegerregs - fpr.regcnt >= 2) 3090 { 3091 // Allocate to register pair 3092 *preg1 = fpr.argregs[fpr.regcnt]; 3093 *preg2 = fpr.argregs[fpr.regcnt + 1]; 3094 fpr.regcnt += 2; 3095 return 1; 3096 } 3097 3098 if (tybasic(ty) == TYcdouble && 3099 fpr.numfloatregs - fpr.xmmcnt >= 2) 3100 { 3101 // Allocate to register pair 3102 *preg1 = fpr.floatregs[fpr.xmmcnt]; 3103 *preg2 = fpr.floatregs[fpr.xmmcnt + 1]; 3104 fpr.xmmcnt += 2; 3105 return 1; 3106 } 3107 3108 if (tybasic(ty) == TYcfloat 3109 && fpr.numfloatregs - fpr.xmmcnt >= 1) 3110 { 3111 // Allocate XMM register 3112 *preg1 = fpr.floatregs[fpr.xmmcnt++]; 3113 return 1; 3114 } 3115 } 3116 3117 foreach (j; 0 .. 2) 3118 { 3119 if (fpr.regcnt < fpr.numintegerregs) 3120 { 3121 if ((I64 || (fpr.i == 1 && (fpr.tyf == TYjfunc || fpr.tyf == TYmfunc))) && 3122 type_jparam2(t, ty)) 3123 { 3124 *preg = fpr.argregs[fpr.regcnt]; 3125 ++fpr.regcnt; 3126 if (config.exe == EX_WIN64) 3127 ++fpr.xmmcnt; 3128 goto Lnext; 3129 } 3130 } 3131 if (fpr.xmmcnt < fpr.numfloatregs) 3132 { 3133 if (tyxmmreg(ty)) 3134 { 3135 *preg = fpr.floatregs[fpr.xmmcnt]; 3136 if (config.exe == EX_WIN64) 3137 ++fpr.regcnt; 3138 ++fpr.xmmcnt; 3139 goto Lnext; 3140 } 3141 } 3142 // Failed to allocate to a register 3143 if (j == 1) 3144 { /* Unwind first preg1 assignment, because it's both or nothing 3145 */ 3146 *preg1 = NOREG; 3147 fpr.regcnt = regcntsave; 3148 fpr.xmmcnt = xmmcntsave; 3149 } 3150 return 0; 3151 3152 Lnext: 3153 if (tybasic(ty2) == TYMAX) 3154 break; 3155 preg = preg2; 3156 t = t2; 3157 ty = ty2; 3158 } 3159 return 1; 3160 } 3161 3162 /*************************************** 3163 * Finds replacemnt types for register passing of aggregates. 3164 */ 3165 void argtypes(type* t, ref type* arg1type, ref type* arg2type) 3166 { 3167 if (!t) return; 3168 3169 tym_t ty = t.Tty; 3170 3171 if (!tyaggregate(ty)) 3172 return; 3173 3174 arg1type = arg2type = null; 3175 3176 if (tybasic(ty) == TYarray) 3177 { 3178 size_t sz = cast(size_t) type_size(t); 3179 if (sz == 0) 3180 return; 3181 3182 if ((I32 || config.exe == EX_WIN64) && (sz & (sz - 1))) // power of 2 3183 return; 3184 3185 if (config.exe == EX_WIN64 && sz > REGSIZE) 3186 return; 3187 3188 if (sz <= 2 * REGSIZE) 3189 { 3190 type** argtype = &arg1type; 3191 size_t argsz = sz < REGSIZE ? sz : REGSIZE; 3192 foreach (v; 0 .. (sz > REGSIZE) + 1) 3193 { 3194 *argtype = argsz == 1 ? tstypes[TYchar] 3195 : argsz == 2 ? tstypes[TYshort] 3196 : argsz <= 4 ? tstypes[TYlong] 3197 : tstypes[TYllong]; 3198 argtype = &arg2type; 3199 argsz = sz - REGSIZE; 3200 } 3201 } 3202 3203 if (I64 && config.exe != EX_WIN64) 3204 { 3205 type* tn = t.Tnext; 3206 tym_t tyn = tn.Tty; 3207 while (tyn == TYarray) 3208 { 3209 tn = tn.Tnext; 3210 assert(tn); 3211 tyn = tybasic(tn.Tty); 3212 } 3213 3214 if (tybasic(tyn) == TYstruct) 3215 { 3216 if (type_size(tn) == sz) // array(s) of size 1 3217 { 3218 arg1type = tn.Ttag.Sstruct.Sarg1type; 3219 arg2type = tn.Ttag.Sstruct.Sarg2type; 3220 return; 3221 } 3222 3223 type* t1 = tn.Ttag.Sstruct.Sarg1type; 3224 if (t1) 3225 { 3226 tn = t1; 3227 tyn = tn.Tty; 3228 } 3229 } 3230 3231 if (sz == tysize(tyn)) 3232 { 3233 if (tysimd(tyn)) 3234 { 3235 type* ts = type_fake(tybasic(tyn)); 3236 ts.Tcount = 1; 3237 arg1type = ts; 3238 return; 3239 } 3240 else if (tybasic(tyn) == TYldouble || tybasic(tyn) == TYildouble) 3241 { 3242 arg1type = tstypes[tybasic(tyn)]; 3243 return; 3244 } 3245 } 3246 3247 if (sz <= 16) 3248 { 3249 if (tyfloating(tyn)) 3250 { 3251 arg1type = sz <= 4 ? tstypes[TYfloat] : tstypes[TYdouble]; 3252 if (sz > 8) 3253 arg2type = (sz - 8) <= 4 ? tstypes[TYfloat] : tstypes[TYdouble]; 3254 } 3255 } 3256 } 3257 } 3258 else if (tybasic(ty) == TYstruct) 3259 { 3260 // TODO: Move code from `cgelem.d:elstruct()` here 3261 } 3262 } 3263 3264 /******************************* 3265 * Generate code sequence for function call. 3266 */ 3267 3268 void cdfunc(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3269 { 3270 //printf("cdfunc()\n"); elem_print(e); 3271 assert(e); 3272 uint numpara = 0; // bytes of parameters 3273 uint numalign = 0; // bytes to align stack before pushing parameters 3274 uint stackpushsave = stackpush; // so we can compute # of parameters 3275 cgstate.stackclean++; 3276 regm_t keepmsk = 0; 3277 int xmmcnt = 0; 3278 tym_t tyf = tybasic(e.EV.E1.Ety); // the function type 3279 3280 // Easier to deal with parameters as an array: parameters[0..np] 3281 int np = OTbinary(e.Eoper) ? el_nparams(e.EV.E2) : 0; 3282 Parameter *parameters = cast(Parameter *)alloca(np * Parameter.sizeof); 3283 3284 if (np) 3285 { 3286 int n = 0; 3287 fillParameters(e.EV.E2, parameters, &n); 3288 assert(n == np); 3289 } 3290 3291 Symbol *sf = null; // symbol of the function being called 3292 if (e.EV.E1.Eoper == OPvar) 3293 sf = e.EV.E1.EV.Vsym; 3294 3295 /* Assume called function access statics 3296 */ 3297 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_FREEBSD | EX_FREEBSD64) && 3298 config.flags3 & CFG3pic) 3299 cgstate.accessedTLS = true; 3300 3301 /* Special handling for call to __tls_get_addr, we must save registers 3302 * before evaluating the parameter, so that the parameter load and call 3303 * are adjacent. 3304 */ 3305 if (np == 1 && sf) 3306 { 3307 if (sf == tls_get_addr_sym) 3308 getregs(cdb, ~sf.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3309 } 3310 3311 uint stackalign = REGSIZE; 3312 if (tyf == TYf16func) 3313 stackalign = 2; 3314 // Figure out which parameters go in registers. 3315 // Compute numpara, the total bytes pushed on the stack 3316 FuncParamRegs fpr = FuncParamRegs_create(tyf); 3317 for (int i = np; --i >= 0;) 3318 { 3319 elem *ep = parameters[i].e; 3320 uint psize = cast(uint)_align(stackalign, paramsize(ep, tyf)); // align on stack boundary 3321 if (config.exe == EX_WIN64) 3322 { 3323 //printf("[%d] size = %u, numpara = %d ep = %p ", i, psize, numpara, ep); WRTYxx(ep.Ety); printf("\n"); 3324 debug 3325 if (psize > REGSIZE) elem_print(e); 3326 3327 assert(psize <= REGSIZE); 3328 psize = REGSIZE; 3329 } 3330 //printf("[%d] size = %u, numpara = %d ", i, psize, numpara); WRTYxx(ep.Ety); printf("\n"); 3331 if (FuncParamRegs_alloc(fpr, ep.ET, ep.Ety, ¶meters[i].reg, ¶meters[i].reg2)) 3332 { 3333 if (config.exe == EX_WIN64) 3334 numpara += REGSIZE; // allocate stack space for it anyway 3335 continue; // goes in register, not stack 3336 } 3337 3338 // Parameter i goes on the stack 3339 parameters[i].reg = NOREG; 3340 uint alignsize = el_alignsize(ep); 3341 parameters[i].numalign = 0; 3342 if (alignsize > stackalign && 3343 (I64 || (alignsize >= 16 && 3344 (config.exe & (EX_OSX | EX_LINUX) && (tyaggregate(ep.Ety) || tyvector(ep.Ety)))))) 3345 { 3346 if (alignsize > STACKALIGN) 3347 { 3348 STACKALIGN = alignsize; 3349 enforcealign = true; 3350 } 3351 uint newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1); 3352 parameters[i].numalign = newnumpara - numpara; 3353 numpara = newnumpara; 3354 assert(config.exe != EX_WIN64); 3355 } 3356 numpara += psize; 3357 } 3358 3359 if (config.exe == EX_WIN64) 3360 { 3361 if (numpara < 4 * REGSIZE) 3362 numpara = 4 * REGSIZE; 3363 } 3364 3365 //printf("numpara = %d, stackpush = %d\n", numpara, stackpush); 3366 assert((numpara & (REGSIZE - 1)) == 0); 3367 assert((stackpush & (REGSIZE - 1)) == 0); 3368 3369 /* Should consider reordering the order of evaluation of the parameters 3370 * so that args that go into registers are evaluated after args that get 3371 * pushed. We can reorder args that are constants or relconst's. 3372 */ 3373 3374 /* Determine if we should use cgstate.funcarg for the parameters or push them 3375 */ 3376 bool usefuncarg = false; 3377 static if (0) 3378 { 3379 printf("test1 %d %d %d %d %d %d %d %d\n", (config.flags4 & CFG4speed)!=0, !Alloca.size, 3380 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)), 3381 cast(int)numpara, !stackpush, 3382 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos), 3383 (!typfunc(tyf) || sf && sf.Sflags & SFLexit), !I16); 3384 } 3385 if (config.flags4 & CFG4speed && 3386 !Alloca.size && 3387 /* The cleanup code calls a local function, leaving the return address on 3388 * the top of the stack. If parameters are placed there, the return address 3389 * is stepped on. 3390 * A better solution is turn this off only inside the cleanup code. 3391 */ 3392 !usednteh && 3393 !calledFinally && 3394 (numpara || config.exe == EX_WIN64) && 3395 stackpush == 0 && // cgstate.funcarg needs to be at top of stack 3396 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos) && 3397 (!(typfunc(tyf) || tyf == TYhfunc) || sf && sf.Sflags & SFLexit) && 3398 !anyiasm && !I16 3399 ) 3400 { 3401 for (int i = 0; i < np; i++) 3402 { 3403 elem* ep = parameters[i].e; 3404 int preg = parameters[i].reg; 3405 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3406 if (preg == NOREG) 3407 { 3408 switch (ep.Eoper) 3409 { 3410 case OPstrctor: 3411 case OPstrthis: 3412 case OPstrpar: 3413 case OPnp_fp: 3414 goto Lno; 3415 3416 default: 3417 break; 3418 } 3419 } 3420 } 3421 3422 if (numpara > cgstate.funcarg.size) 3423 { // New high water mark 3424 //printf("increasing size from %d to %d\n", (int)cgstate.funcarg.size, (int)numpara); 3425 cgstate.funcarg.size = numpara; 3426 } 3427 usefuncarg = true; 3428 } 3429 Lno: 3430 3431 /* Adjust start of the stack so after all args are pushed, 3432 * the stack will be aligned. 3433 */ 3434 if (!usefuncarg && STACKALIGN >= 16 && (numpara + stackpush) & (STACKALIGN - 1)) 3435 { 3436 numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1)); 3437 cod3_stackadj(cdb, numalign); 3438 cdb.genadjesp(numalign); 3439 stackpush += numalign; 3440 stackpushsave += numalign; 3441 } 3442 assert(stackpush == stackpushsave); 3443 if (config.exe == EX_WIN64) 3444 { 3445 //printf("np = %d, numpara = %d, stackpush = %d\n", np, numpara, stackpush); 3446 assert(numpara == ((np < 4) ? 4 * REGSIZE : np * REGSIZE)); 3447 3448 // Allocate stack space for four entries anyway 3449 // http://msdn.microsoft.com/en-US/library/ew5tede7(v=vs.80) 3450 } 3451 3452 int[XMM7 + 1] regsaved = void; 3453 memset(regsaved.ptr, -1, regsaved.sizeof); 3454 CodeBuilder cdbrestore; 3455 cdbrestore.ctor(); 3456 regm_t saved = 0; 3457 targ_size_t funcargtossave = cgstate.funcargtos; 3458 targ_size_t funcargtos = numpara; 3459 //printf("funcargtos1 = %d\n", cast(int)funcargtos); 3460 3461 /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9 3462 * float and double parameters go into XMM0..XMM7 3463 * For variadic functions, count of XMM registers used goes in AL 3464 */ 3465 for (int i = 0; i < np; i++) 3466 { 3467 elem* ep = parameters[i].e; 3468 int preg = parameters[i].reg; 3469 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3470 if (preg == NOREG) 3471 { 3472 /* Push parameter on stack, but keep track of registers used 3473 * in the process. If they interfere with keepmsk, we'll have 3474 * to save/restore them. 3475 */ 3476 CodeBuilder cdbsave; 3477 cdbsave.ctor(); 3478 regm_t overlap = msavereg & keepmsk; 3479 msavereg |= keepmsk; 3480 CodeBuilder cdbparams; 3481 cdbparams.ctor(); 3482 if (usefuncarg) 3483 movParams(cdbparams, ep, stackalign, cast(uint)funcargtos, tyf); 3484 else 3485 pushParams(cdbparams,ep,stackalign, tyf); 3486 regm_t tosave = keepmsk & ~msavereg; 3487 msavereg &= ~keepmsk | overlap; 3488 3489 // tosave is the mask to save and restore 3490 for (reg_t j = 0; tosave; j++) 3491 { 3492 regm_t mi = mask(j); 3493 assert(j <= XMM7); 3494 if (mi & tosave) 3495 { 3496 uint idx; 3497 regsave.save(cdbsave, j, &idx); 3498 regsave.restore(cdbrestore, j, idx); 3499 saved |= mi; 3500 keepmsk &= ~mi; // don't need to keep these for rest of params 3501 tosave &= ~mi; 3502 } 3503 } 3504 3505 cdb.append(cdbsave); 3506 cdb.append(cdbparams); 3507 3508 // Alignment for parameter comes after it got pushed 3509 const uint numalignx = parameters[i].numalign; 3510 if (usefuncarg) 3511 { 3512 funcargtos -= _align(stackalign, paramsize(ep, tyf)) + numalignx; 3513 cgstate.funcargtos = funcargtos; 3514 } 3515 else if (numalignx) 3516 { 3517 cod3_stackadj(cdb, numalignx); 3518 cdb.genadjesp(numalignx); 3519 stackpush += numalignx; 3520 } 3521 } 3522 else 3523 { 3524 // Goes in register preg, not stack 3525 regm_t retregs = mask(preg); 3526 if (retregs & XMMREGS) 3527 ++xmmcnt; 3528 int preg2 = parameters[i].reg2; 3529 reg_t mreg,lreg; 3530 if (preg2 != NOREG || tybasic(ep.Ety) == TYcfloat) 3531 { 3532 assert(ep.Eoper != OPstrthis); 3533 if (mask(preg2) & XMMREGS) 3534 ++xmmcnt; 3535 if (tybasic(ep.Ety) == TYcfloat) 3536 { 3537 lreg = ST01; 3538 mreg = NOREG; 3539 } 3540 else if (tyrelax(ep.Ety) == TYcent) 3541 { 3542 lreg = mask(preg ) & mLSW ? cast(reg_t)preg : AX; 3543 mreg = mask(preg2) & mMSW ? cast(reg_t)preg2 : DX; 3544 } 3545 else 3546 { 3547 lreg = XMM0; 3548 mreg = XMM1; 3549 } 3550 retregs = (mask(mreg) | mask(lreg)) & ~mask(NOREG); 3551 CodeBuilder cdbsave; 3552 cdbsave.ctor(); 3553 if (keepmsk & retregs) 3554 { 3555 regm_t tosave = keepmsk & retregs; 3556 3557 // tosave is the mask to save and restore 3558 for (reg_t j = 0; tosave; j++) 3559 { 3560 regm_t mi = mask(j); 3561 assert(j <= XMM7); 3562 if (mi & tosave) 3563 { 3564 uint idx; 3565 regsave.save(cdbsave, j, &idx); 3566 regsave.restore(cdbrestore, j, idx); 3567 saved |= mi; 3568 keepmsk &= ~mi; // don't need to keep these for rest of params 3569 tosave &= ~mi; 3570 } 3571 } 3572 } 3573 cdb.append(cdbsave); 3574 3575 scodelem(cdb, ep, &retregs, keepmsk, false); 3576 3577 // Move result [mreg,lreg] into parameter registers from [preg2,preg] 3578 retregs = 0; 3579 if (preg != lreg) 3580 retregs |= mask(preg); 3581 if (preg2 != mreg) 3582 retregs |= mask(preg2); 3583 retregs &= ~mask(NOREG); 3584 getregs(cdb,retregs); 3585 3586 tym_t ty1 = tybasic(ep.Ety); 3587 tym_t ty2 = ty1; 3588 if (ep.Ety & mTYgprxmm) 3589 { 3590 ty1 = TYllong; 3591 ty2 = TYdouble; 3592 } 3593 else if (ep.Ety & mTYxmmgpr) 3594 { 3595 ty1 = TYdouble; 3596 ty2 = TYllong; 3597 } 3598 else if (ty1 == TYstruct) 3599 { 3600 type* targ1 = ep.ET.Ttag.Sstruct.Sarg1type; 3601 type* targ2 = ep.ET.Ttag.Sstruct.Sarg2type; 3602 if (targ1) 3603 ty1 = targ1.Tty; 3604 if (targ2) 3605 ty2 = targ2.Tty; 3606 } 3607 else if (tyrelax(ty1) == TYcent) 3608 ty1 = ty2 = TYllong; 3609 else if (tybasic(ty1) == TYcdouble) 3610 ty1 = ty2 = TYdouble; 3611 3612 if (tybasic(ep.Ety) == TYcfloat) 3613 { 3614 assert(I64); 3615 assert(lreg == ST01 && mreg == NOREG); 3616 // spill 3617 pop87(); 3618 pop87(); 3619 cdb.genfltreg(0xD9, 3, tysize(TYfloat)); 3620 genfwait(cdb); 3621 cdb.genfltreg(0xD9, 3, 0); 3622 genfwait(cdb); 3623 // reload 3624 if (config.exe == EX_WIN64) 3625 { 3626 cdb.genfltreg(LOD, preg, 0); 3627 code_orrex(cdb.last(), REX_W); 3628 } 3629 else 3630 { 3631 assert(mask(preg) & XMMREGS); 3632 cdb.genxmmreg(xmmload(TYdouble), cast(reg_t) preg, 0, TYdouble); 3633 } 3634 } 3635 else foreach (v; 0 .. 2) 3636 { 3637 if (v ^ (preg != mreg)) 3638 genmovreg(cdb, preg, lreg, ty1); 3639 else 3640 genmovreg(cdb, preg2, mreg, ty2); 3641 } 3642 3643 retregs = (mask(preg) | mask(preg2)) & ~mask(NOREG); 3644 } 3645 else if (ep.Eoper == OPstrthis) 3646 { 3647 getregs(cdb,retregs); 3648 // LEA preg,np[RSP] 3649 uint delta = stackpush - ep.EV.Vuns; // stack delta to parameter 3650 cdb.genc1(LEA, 3651 (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,delta); 3652 if (I64) 3653 code_orrex(cdb.last(), REX_W); 3654 } 3655 else if (ep.Eoper == OPstrpar && config.exe == EX_WIN64 && type_size(ep.ET) == 0) 3656 { 3657 retregs = 0; 3658 scodelem(cdb, ep.EV.E1, &retregs, keepmsk, false); 3659 freenode(ep); 3660 } 3661 else 3662 { 3663 scodelem(cdb, ep, &retregs, keepmsk, false); 3664 } 3665 keepmsk |= retregs; // don't change preg when evaluating func address 3666 } 3667 } 3668 3669 if (config.exe == EX_WIN64) 3670 { // Allocate stack space for four entries anyway 3671 // http://msdn.microsoft.com/en-US/library/ew5tede7(v=vs.80) 3672 { uint sz = 4 * REGSIZE; 3673 if (usefuncarg) 3674 { 3675 funcargtos -= sz; 3676 cgstate.funcargtos = funcargtos; 3677 } 3678 else 3679 { 3680 cod3_stackadj(cdb, sz); 3681 cdb.genadjesp(sz); 3682 stackpush += sz; 3683 } 3684 } 3685 3686 /* Variadic functions store XMM parameters into their corresponding GP registers 3687 */ 3688 for (int i = 0; i < np; i++) 3689 { 3690 int preg = parameters[i].reg; 3691 regm_t retregs = mask(preg); 3692 if (retregs & XMMREGS) 3693 { 3694 reg_t reg; 3695 switch (preg) 3696 { 3697 case XMM0: reg = CX; break; 3698 case XMM1: reg = DX; break; 3699 case XMM2: reg = R8; break; 3700 case XMM3: reg = R9; break; 3701 3702 default: assert(0); 3703 } 3704 getregs(cdb,mask(reg)); 3705 cdb.gen2(STOD,(REX_W << 16) | modregxrmx(3,preg-XMM0,reg)); // MOVD reg,preg 3706 } 3707 } 3708 } 3709 3710 // Restore any register parameters we saved 3711 getregs(cdb,saved); 3712 cdb.append(cdbrestore); 3713 keepmsk |= saved; 3714 3715 // Variadic functions store the number of XMM registers used in AL 3716 if (I64 && config.exe != EX_WIN64 && e.Eflags & EFLAGS_variadic) 3717 { 3718 getregs(cdb,mAX); 3719 movregconst(cdb,AX,xmmcnt,1); 3720 keepmsk |= mAX; 3721 } 3722 3723 //printf("funcargtos2 = %d\n", (int)funcargtos); 3724 assert(!usefuncarg || (funcargtos == 0 && cgstate.funcargtos == 0)); 3725 cgstate.stackclean--; 3726 3727 debug 3728 if (!usefuncarg && numpara != stackpush - stackpushsave) 3729 { 3730 printf("function %s\n", funcsym_p.Sident.ptr); 3731 printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave); 3732 elem_print(e); 3733 } 3734 3735 assert(usefuncarg || numpara == stackpush - stackpushsave); 3736 3737 funccall(cdb,e,numpara,numalign,pretregs,keepmsk,usefuncarg); 3738 cgstate.funcargtos = funcargtossave; 3739 } 3740 3741 /*********************************** 3742 */ 3743 3744 void cdstrthis(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3745 { 3746 assert(tysize(e.Ety) == REGSIZE); 3747 const reg = findreg(*pretregs & allregs); 3748 getregs(cdb,mask(reg)); 3749 // LEA reg,np[ESP] 3750 uint np = stackpush - e.EV.Vuns; // stack delta to parameter 3751 cdb.genc1(LEA,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np); 3752 if (I64) 3753 code_orrex(cdb.last(), REX_W); 3754 fixresult(cdb, e, mask(reg), pretregs); 3755 } 3756 3757 /****************************** 3758 * Call function. All parameters have already been pushed onto the stack. 3759 * Params: 3760 * e = function call 3761 * numpara = size in bytes of all the parameters 3762 * numalign = amount the stack was aligned by before the parameters were pushed 3763 * pretregs = where return value goes 3764 * keepmsk = registers to not change when evaluating the function address 3765 * usefuncarg = using cgstate.funcarg, so no need to adjust stack after func return 3766 */ 3767 3768 private void funccall(ref CodeBuilder cdb, elem* e, uint numpara, uint numalign, 3769 regm_t* pretregs,regm_t keepmsk, bool usefuncarg) 3770 { 3771 //printf("%s ", funcsym_p.Sident.ptr); 3772 //printf("funccall(e = %p, *pretregs = %s, numpara = %d, numalign = %d, usefuncarg=%d)\n",e,regm_str(*pretregs),numpara,numalign,usefuncarg); 3773 calledafunc = 1; 3774 // Determine if we need frame for function prolog/epilog 3775 3776 if (config.memmodel == Vmodel) 3777 { 3778 if (tyfarfunc(funcsym_p.ty())) 3779 needframe = true; 3780 } 3781 3782 code cs; 3783 regm_t retregs; 3784 Symbol* s; 3785 3786 elem* e1 = e.EV.E1; 3787 tym_t tym1 = tybasic(e1.Ety); 3788 char farfunc = tyfarfunc(tym1) || tym1 == TYifunc; 3789 3790 CodeBuilder cdbe; 3791 cdbe.ctor(); 3792 3793 if (e1.Eoper == OPvar) 3794 { // Call function directly 3795 3796 if (!tyfunc(tym1)) 3797 WRTYxx(tym1); 3798 assert(tyfunc(tym1)); 3799 s = e1.EV.Vsym; 3800 if (s.Sflags & SFLexit) 3801 { } 3802 else if (s != tls_get_addr_sym) 3803 save87(cdb); // assume 8087 regs are all trashed 3804 3805 // Function calls may throw Errors, unless marked that they don't 3806 if (s == funcsym_p || !s.Sfunc || !(s.Sfunc.Fflags3 & Fnothrow)) 3807 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3808 3809 if (s.Sflags & SFLexit) 3810 { 3811 // Function doesn't return, so don't worry about registers 3812 // it may use 3813 } 3814 else if (!tyfunc(s.ty()) || !(config.flags4 & CFG4optimized)) 3815 // so we can replace func at runtime 3816 getregs(cdbe,~fregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3817 else 3818 getregs(cdbe,~s.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3819 if (strcmp(s.Sident.ptr, "alloca") == 0) 3820 { 3821 s = getRtlsym(RTLSYM_ALLOCA); 3822 makeitextern(s); 3823 int areg = CX; 3824 if (config.exe == EX_WIN64) 3825 areg = DX; 3826 getregs(cdbe, mask(areg)); 3827 cdbe.genc(LEA, modregrm(2, areg, BPRM), FLallocatmp, 0, 0, 0); // LEA areg,&localsize[BP] 3828 if (I64) 3829 code_orrex(cdbe.last(), REX_W); 3830 Alloca.size = REGSIZE; 3831 } 3832 if (sytab[s.Sclass] & SCSS) // if function is on stack (!) 3833 { 3834 retregs = allregs & ~keepmsk; 3835 s.Sflags &= ~GTregcand; 3836 s.Sflags |= SFLread; 3837 cdrelconst(cdbe,e1,&retregs); 3838 if (farfunc) 3839 { 3840 const reg = findregmsw(retregs); 3841 const lsreg = findreglsw(retregs); 3842 floatreg = true; // use float register 3843 reflocal = true; 3844 cdbe.genc1(0x89, // MOV floatreg+2,reg 3845 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3846 cdbe.genc1(0x89, // MOV floatreg,lsreg 3847 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3848 if (tym1 == TYifunc) 3849 cdbe.gen1(0x9C); // PUSHF 3850 cdbe.genc1(0xFF, // CALL [floatreg] 3851 modregrm(2, 3, BPRM), FLfltreg, 0); 3852 } 3853 else 3854 { 3855 const reg = findreg(retregs); 3856 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 3857 if (I64) 3858 code_orrex(cdbe.last(), REX_W); 3859 } 3860 } 3861 else 3862 { 3863 int fl = FLfunc; 3864 if (!tyfunc(s.ty())) 3865 fl = el_fl(e1); 3866 if (tym1 == TYifunc) 3867 cdbe.gen1(0x9C); // PUSHF 3868 if (config.exe & (EX_windos | EX_OSX | EX_OSX64)) 3869 { 3870 cdbe.gencs(farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern 3871 } 3872 else 3873 { 3874 assert(!farfunc); 3875 if (s != tls_get_addr_sym) 3876 { 3877 //printf("call %s\n", s.Sident.ptr); 3878 load_localgot(cdb); 3879 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3880 } 3881 else if (I64) 3882 { 3883 /* Prepend 66 66 48 so GNU linker has patch room 3884 */ 3885 assert(!farfunc); 3886 cdbe.gen1(0x66); 3887 cdbe.gen1(0x66); 3888 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3889 cdbe.last().Irex = REX | REX_W; 3890 } 3891 else 3892 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3893 } 3894 code_orflag(cdbe.last(), farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff)); 3895 } 3896 } 3897 else 3898 { // Call function via pointer 3899 3900 // Function calls may throw Errors 3901 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3902 3903 if (e1.Eoper != OPind) { WRFL(cast(FL)el_fl(e1)); WROP(e1.Eoper); } 3904 save87(cdb); // assume 8087 regs are all trashed 3905 assert(e1.Eoper == OPind); 3906 elem *e11 = e1.EV.E1; 3907 tym_t e11ty = tybasic(e11.Ety); 3908 assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); 3909 load_localgot(cdb); 3910 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // 32 bit only 3911 { 3912 if (config.flags3 & CFG3pic) 3913 keepmsk |= mBX; 3914 } 3915 3916 /* Mask of registers destroyed by the function call 3917 */ 3918 regm_t desmsk = (mBP | ALLREGS | mES | XMMREGS) & ~fregsaved; 3919 3920 // if we can't use loadea() 3921 if ((!OTleaf(e11.Eoper) || e11.Eoper == OPconst) && 3922 (e11.Eoper != OPind || e11.Ecount)) 3923 { 3924 retregs = allregs & ~keepmsk; 3925 cgstate.stackclean++; 3926 scodelem(cdbe,e11,&retregs,keepmsk,true); 3927 cgstate.stackclean--; 3928 // Kill registers destroyed by an arbitrary function call 3929 getregs(cdbe,desmsk); 3930 if (e11ty == TYfptr) 3931 { 3932 const reg = findregmsw(retregs); 3933 const lsreg = findreglsw(retregs); 3934 floatreg = true; // use float register 3935 reflocal = true; 3936 cdbe.genc1(0x89, // MOV floatreg+2,reg 3937 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3938 cdbe.genc1(0x89, // MOV floatreg,lsreg 3939 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3940 if (tym1 == TYifunc) 3941 cdbe.gen1(0x9C); // PUSHF 3942 cdbe.genc1(0xFF, // CALL [floatreg] 3943 modregrm(2, 3, BPRM), FLfltreg, 0); 3944 } 3945 else 3946 { 3947 const reg = findreg(retregs); 3948 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 3949 if (I64) 3950 code_orrex(cdbe.last(), REX_W); 3951 } 3952 } 3953 else 3954 { 3955 if (tym1 == TYifunc) 3956 cdb.gen1(0x9C); // PUSHF 3957 // CALL [function] 3958 cs.Iflags = 0; 3959 cgstate.stackclean++; 3960 loadea(cdbe, e11, &cs, 0xFF, farfunc ? 3 : 2, 0, keepmsk, desmsk); 3961 cgstate.stackclean--; 3962 freenode(e11); 3963 } 3964 s = null; 3965 } 3966 cdb.append(cdbe); 3967 freenode(e1); 3968 3969 /* See if we will need the frame pointer. 3970 Calculate it here so we can possibly use BP to fix the stack. 3971 */ 3972 static if (0) 3973 { 3974 if (!needframe) 3975 { 3976 // If there is a register available for this basic block 3977 if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used)) 3978 { } 3979 else 3980 { 3981 for (SYMIDX si = 0; si < globsym.length; si++) 3982 { 3983 Symbol* s = globsym[si]; 3984 3985 if (s.Sflags & GTregcand && type_size(s.Stype) != 0) 3986 { 3987 if (config.flags4 & CFG4optimized) 3988 { // If symbol is live in this basic block and 3989 // isn't already in a register 3990 if (s.Srange && vec_testbit(dfoidx, s.Srange) && 3991 s.Sfl != FLreg) 3992 { // Then symbol must be allocated on stack 3993 needframe = true; 3994 break; 3995 } 3996 } 3997 else 3998 { if (mfuncreg == 0) // if no registers left 3999 { needframe = true; 4000 break; 4001 } 4002 } 4003 } 4004 } 4005 } 4006 } 4007 } 4008 4009 reg_t reg1, reg2; 4010 retregs = allocretregs(e.Ety, e.ET, tym1, reg1, reg2); 4011 4012 assert(retregs || !*pretregs); 4013 4014 if (!usefuncarg) 4015 { 4016 // If stack needs cleanup 4017 if (s && s.Sflags & SFLexit) 4018 { 4019 if (config.fulltypes && TARGET_WINDOS) 4020 { 4021 // the stack walker evaluates the return address, not a byte of the 4022 // call instruction, so ensure there is an instruction byte after 4023 // the call that still has the same line number information 4024 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); 4025 } 4026 /* Function never returns, so don't need to generate stack 4027 * cleanup code. But still need to log the stack cleanup 4028 * as if it did return. 4029 */ 4030 cdb.genadjesp(-(numpara + numalign)); 4031 stackpush -= numpara + numalign; 4032 } 4033 else if ((OTbinary(e.Eoper) || config.exe == EX_WIN64) && 4034 (!typfunc(tym1) || config.exe == EX_WIN64)) 4035 { 4036 if (tym1 == TYhfunc) 4037 { // Hidden parameter is popped off by the callee 4038 cdb.genadjesp(-REGSIZE); 4039 stackpush -= REGSIZE; 4040 if (numpara + numalign > REGSIZE) 4041 genstackclean(cdb, numpara + numalign - REGSIZE, retregs); 4042 } 4043 else 4044 genstackclean(cdb, numpara + numalign, retregs); 4045 } 4046 else 4047 { 4048 cdb.genadjesp(-numpara); // popped off by the callee's 'RET numpara' 4049 stackpush -= numpara; 4050 if (numalign) // callee doesn't know about alignment adjustment 4051 genstackclean(cdb,numalign,retregs); 4052 } 4053 } 4054 4055 /* Special handling for functions which return a floating point 4056 value in the top of the 8087 stack. 4057 */ 4058 4059 if (retregs & mST0) 4060 { 4061 cdb.genadjfpu(1); 4062 if (*pretregs) // if we want the result 4063 { 4064 //assert(global87.stackused == 0); 4065 push87(cdb); // one item on 8087 stack 4066 fixresult87(cdb,e,retregs,pretregs); 4067 return; 4068 } 4069 else 4070 // Pop unused result off 8087 stack 4071 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4072 } 4073 else if (retregs & mST01) 4074 { 4075 cdb.genadjfpu(2); 4076 if (*pretregs) // if we want the result 4077 { 4078 assert(global87.stackused == 0); 4079 push87(cdb); 4080 push87(cdb); // two items on 8087 stack 4081 fixresult_complex87(cdb, e, retregs, pretregs, true); 4082 return; 4083 } 4084 else 4085 { 4086 // Pop unused result off 8087 stack 4087 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4088 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 4089 } 4090 } 4091 4092 /* Special handling for functions that return one part 4093 in XMM0 and the other part in AX 4094 */ 4095 if (*pretregs && retregs) 4096 { 4097 if (reg1 == NOREG || reg2 == NOREG) 4098 {} 4099 else if ((0 == (mask(reg1) & XMMREGS)) ^ (0 == (mask(reg2) & XMMREGS))) 4100 { 4101 reg_t lreg, mreg; 4102 if (mask(reg1) & XMMREGS) 4103 { 4104 lreg = XMM0; 4105 mreg = XMM1; 4106 } 4107 else 4108 { 4109 lreg = mask(reg1) & mLSW ? reg1 : AX; 4110 mreg = mask(reg2) & mMSW ? reg2 : DX; 4111 } 4112 for (int v = 0; v < 2; v++) 4113 { 4114 if (v ^ (reg2 != lreg)) 4115 genmovreg(cdb,lreg,reg1); 4116 else 4117 genmovreg(cdb,mreg,reg2); 4118 } 4119 retregs = mask(lreg) | mask(mreg); 4120 } 4121 } 4122 4123 /* Special handling for functions which return complex float in XMM0 or RAX. */ 4124 4125 if (I64 4126 && config.exe != EX_WIN64 // broken 4127 && *pretregs && tybasic(e.Ety) == TYcfloat) 4128 { 4129 assert(reg2 == NOREG); 4130 // spill 4131 if (config.exe == EX_WIN64) 4132 { 4133 assert(reg1 == AX); 4134 cdb.genfltreg(STO, reg1, 0); 4135 code_orrex(cdb.last(), REX_W); 4136 } 4137 else 4138 { 4139 assert(reg1 == XMM0); 4140 cdb.genxmmreg(xmmstore(TYdouble), reg1, 0, TYdouble); 4141 } 4142 // reload real 4143 push87(cdb); 4144 cdb.genfltreg(0xD9, 0, 0); 4145 genfwait(cdb); 4146 // reload imaginary 4147 push87(cdb); 4148 cdb.genfltreg(0xD9, 0, tysize(TYfloat)); 4149 genfwait(cdb); 4150 4151 retregs = mST01; 4152 } 4153 4154 fixresult(cdb, e, retregs, pretregs); 4155 } 4156 4157 /*************************** 4158 * Determine size of argument e that will be pushed. 4159 */ 4160 4161 targ_size_t paramsize(elem* e, tym_t tyf) 4162 { 4163 assert(e.Eoper != OPparam); 4164 targ_size_t szb; 4165 tym_t tym = tybasic(e.Ety); 4166 if (tyscalar(tym)) 4167 szb = size(tym); 4168 else if (tym == TYstruct || tym == TYarray) 4169 szb = type_parameterSize(e.ET, tyf); 4170 else 4171 { 4172 WRTYxx(tym); 4173 assert(0); 4174 } 4175 return szb; 4176 } 4177 4178 /*************************** 4179 * Generate code to move argument e on the stack. 4180 */ 4181 4182 private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funcargtos, tym_t tyf) 4183 { 4184 //printf("movParams(e = %p, stackalign = %d, funcargtos = %d)\n", e, stackalign, funcargtos); 4185 //printf("movParams()\n"); elem_print(e); 4186 assert(!I16); 4187 assert(e && e.Eoper != OPparam); 4188 4189 tym_t tym = tybasic(e.Ety); 4190 if (tyfloating(tym)) 4191 objmod.fltused(); 4192 4193 int grex = I64 ? REX_W << 16 : 0; 4194 4195 targ_size_t szb = paramsize(e, tyf); // size before alignment 4196 targ_size_t sz = _align(stackalign, szb); // size after alignment 4197 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 4198 assert((sz & (REGSIZE - 1)) == 0); 4199 //printf("szb = %d sz = %d\n", (int)szb, (int)sz); 4200 4201 code cs; 4202 cs.Iflags = 0; 4203 cs.Irex = 0; 4204 switch (e.Eoper) 4205 { 4206 case OPstrctor: 4207 case OPstrthis: 4208 case OPstrpar: 4209 case OPnp_fp: 4210 assert(0); 4211 4212 case OPrelconst: 4213 { 4214 int fl; 4215 if (!evalinregister(e) && 4216 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 4217 ((fl = el_fl(e)) == FLdata || fl == FLudata || fl == FLextern) 4218 ) 4219 { 4220 // MOV -stackoffset[EBP],&variable 4221 cs.Iop = 0xC7; 4222 cs.Irm = modregrm(2,0,BPRM); 4223 if (I64 && sz == 8) 4224 cs.Irex |= REX_W; 4225 cs.IFL1 = FLfuncarg; 4226 cs.IEV1.Voffset = funcargtos - REGSIZE; 4227 cs.IEV2.Voffset = e.EV.Voffset; 4228 cs.IFL2 = cast(ubyte)fl; 4229 cs.IEV2.Vsym = e.EV.Vsym; 4230 cs.Iflags |= CFoff; 4231 cdb.gen(&cs); 4232 return; 4233 } 4234 break; 4235 } 4236 4237 case OPconst: 4238 if (!evalinregister(e)) 4239 { 4240 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 4241 cs.Irm = modregrm(2,0,BPRM); 4242 cs.IFL1 = FLfuncarg; 4243 cs.IEV1.Voffset = funcargtos - sz; 4244 cs.IFL2 = FLconst; 4245 targ_size_t *p = cast(targ_size_t *) &(e.EV); 4246 cs.IEV2.Vsize_t = *p; 4247 if (I64 && tym == TYcldouble) 4248 // The alignment of EV.Vcldouble is not the same on the compiler 4249 // as on the target 4250 goto Lbreak; 4251 if (I64 && sz >= 8) 4252 { 4253 int i = cast(int)sz; 4254 do 4255 { 4256 if (*p >= 0x80000000) 4257 { // Use 64 bit register MOV, as the 32 bit one gets sign extended 4258 // MOV reg,imm64 4259 // MOV EA,reg 4260 goto Lbreak; 4261 } 4262 p = cast(targ_size_t *)(cast(char *) p + REGSIZE); 4263 i -= REGSIZE; 4264 } while (i > 0); 4265 p = cast(targ_size_t *) &(e.EV); 4266 } 4267 4268 int i = cast(int)sz; 4269 do 4270 { int regsize = REGSIZE; 4271 regm_t retregs = (sz == 1) ? BYTEREGS : allregs; 4272 reg_t reg; 4273 if (reghasvalue(retregs,*p,®)) 4274 { 4275 cs.Iop = (cs.Iop & 1) | 0x88; 4276 cs.Irm |= modregrm(0, reg & 7, 0); // MOV EA,reg 4277 if (reg & 8) 4278 cs.Irex |= REX_R; 4279 if (I64 && sz == 1 && reg >= 4) 4280 cs.Irex |= REX; 4281 } 4282 if (I64 && sz >= 8) 4283 cs.Irex |= REX_W; 4284 cdb.gen(&cs); // MOV EA,const 4285 4286 p = cast(targ_size_t *)(cast(char *) p + regsize); 4287 cs.Iop = 0xC7; 4288 cs.Irm &= cast(ubyte)~cast(int)modregrm(0, 7, 0); 4289 cs.Irex &= ~REX_R; 4290 cs.IEV1.Voffset += regsize; 4291 cs.IEV2.Vint = cast(targ_int)*p; 4292 i -= regsize; 4293 } while (i > 0); 4294 return; 4295 } 4296 4297 Lbreak: 4298 break; 4299 4300 default: 4301 break; 4302 } 4303 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 4304 if (tyvector(tym) || 4305 config.fpxmmregs && tyxmmreg(tym) && 4306 // If not already in x87 register from function call return 4307 !((e.Eoper == OPcall || e.Eoper == OPucall) && I32)) 4308 { 4309 retregs = XMMREGS; 4310 codelem(cdb, e, &retregs, false); 4311 const op = xmmstore(tym); 4312 const r = findreg(retregs); 4313 cdb.genc1(op, modregxrm(2, r - XMM0, BPRM), FLfuncarg, funcargtos - sz); // MOV funcarg[EBP],r 4314 checkSetVex(cdb.last(),tym); 4315 return; 4316 } 4317 else if (tyfloating(tym)) 4318 { 4319 if (config.inline8087) 4320 { 4321 retregs = tycomplex(tym) ? mST01 : mST0; 4322 codelem(cdb, e, &retregs, false); 4323 4324 opcode_t op; 4325 uint r; 4326 switch (tym) 4327 { 4328 case TYfloat: 4329 case TYifloat: 4330 case TYcfloat: 4331 op = 0xD9; 4332 r = 3; 4333 break; 4334 4335 case TYdouble: 4336 case TYidouble: 4337 case TYdouble_alias: 4338 case TYcdouble: 4339 op = 0xDD; 4340 r = 3; 4341 break; 4342 4343 case TYldouble: 4344 case TYildouble: 4345 case TYcldouble: 4346 op = 0xDB; 4347 r = 7; 4348 break; 4349 4350 default: 4351 assert(0); 4352 } 4353 if (tycomplex(tym)) 4354 { 4355 // FSTP sz/2[ESP] 4356 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz/2); 4357 pop87(); 4358 } 4359 pop87(); 4360 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz); // FSTP -sz[EBP] 4361 return; 4362 } 4363 } 4364 scodelem(cdb, e, &retregs, 0, true); 4365 if (sz <= REGSIZE) 4366 { 4367 uint r = findreg(retregs); 4368 cdb.genc1(0x89, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4369 if (sz == 8) 4370 code_orrex(cdb.last(), REX_W); 4371 } 4372 else if (sz == REGSIZE * 2) 4373 { 4374 uint r = findregmsw(retregs); 4375 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4376 r = findreglsw(retregs); 4377 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE * 2); // MOV -2*REGSIZE[EBP],r 4378 } 4379 else 4380 assert(0); 4381 } 4382 4383 4384 /*************************** 4385 * Generate code to push argument e on the stack. 4386 * stackpush is incremented by stackalign for each PUSH. 4387 */ 4388 4389 void pushParams(ref CodeBuilder cdb, elem* e, uint stackalign, tym_t tyf) 4390 { 4391 //printf("params(e = %p, stackalign = %d)\n", e, stackalign); 4392 //printf("params()\n"); elem_print(e); 4393 stackchanged = 1; 4394 assert(e && e.Eoper != OPparam); 4395 4396 tym_t tym = tybasic(e.Ety); 4397 if (tyfloating(tym)) 4398 objmod.fltused(); 4399 4400 int grex = I64 ? REX_W << 16 : 0; 4401 4402 targ_size_t szb = paramsize(e, tyf); // size before alignment 4403 targ_size_t sz = _align(stackalign,szb); // size after alignment 4404 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 4405 assert((sz & (REGSIZE - 1)) == 0); 4406 4407 switch (e.Eoper) 4408 { 4409 version (SCPP) 4410 { 4411 case OPstrctor: 4412 { 4413 elem* e1 = e.EV.E1; 4414 docommas(cdb,&e1); // skip over any comma expressions 4415 4416 cod3_stackadj(cdb, sz); 4417 stackpush += sz; 4418 cdb.genadjesp(sz); 4419 4420 // Find OPstrthis and set it to stackpush 4421 exp2_setstrthis(e1, null, stackpush, null); 4422 4423 regm_t retregs = 0; 4424 codelem(cdb, e1, &retregs, true); 4425 freenode(e); 4426 return; 4427 } 4428 case OPstrthis: 4429 // This is the parameter for the 'this' pointer corresponding to 4430 // OPstrctor. We push a pointer to an object that was already 4431 // allocated on the stack by OPstrctor. 4432 { 4433 regm_t retregs = allregs; 4434 reg_t reg; 4435 allocreg(cdb, &retregs, ®, TYoffset); 4436 genregs(cdb, 0x89, SP, reg); // MOV reg,SP 4437 if (I64) 4438 code_orrex(cdb.last(), REX_W); 4439 uint np = stackpush - e.EV.Vuns; // stack delta to parameter 4440 cdb.genc2(0x81, grex | modregrmx(3, 0, reg), np); // ADD reg,np 4441 if (sz > REGSIZE) 4442 { 4443 cdb.gen1(0x16); // PUSH SS 4444 stackpush += REGSIZE; 4445 } 4446 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4447 if (reg & 8) 4448 code_orrex(cdb.last(), REX_B); 4449 stackpush += REGSIZE; 4450 cdb.genadjesp(sz); 4451 freenode(e); 4452 return; 4453 } 4454 } 4455 4456 case OPstrpar: 4457 { 4458 uint rm; 4459 4460 elem* e1 = e.EV.E1; 4461 if (sz == 0) 4462 { 4463 docommas(cdb, &e1); // skip over any commas 4464 4465 const stackpushsave = stackpush; 4466 const stackcleansave = cgstate.stackclean; 4467 cgstate.stackclean = 0; 4468 4469 regm_t retregs = 0; 4470 codelem(cdb,e1,&retregs,true); 4471 4472 assert(cgstate.stackclean == 0); 4473 cgstate.stackclean = stackcleansave; 4474 genstackclean(cdb,stackpush - stackpushsave,0); 4475 4476 freenode(e); 4477 return; 4478 } 4479 if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1.Eoper == OPvar) 4480 { 4481 freenode(e); 4482 e = e1; 4483 goto L1; 4484 } 4485 docommas(cdb,&e1); // skip over any commas 4486 code_flags_t seg = 0; // assume no seg override 4487 regm_t retregs = sz ? IDXREGS : 0; 4488 bool doneoff = false; 4489 uint pushsize = REGSIZE; 4490 uint op16 = 0; 4491 if (!I16 && sz & 2) // if odd number of words to push 4492 { 4493 pushsize = 2; 4494 op16 = 1; 4495 } 4496 else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0) 4497 { 4498 pushsize = 4; // push DWORDs at a time 4499 op16 = 1; 4500 } 4501 uint npushes = cast(uint)(sz / pushsize); 4502 switch (e1.Eoper) 4503 { 4504 case OPind: 4505 if (sz) 4506 { 4507 switch (tybasic(e1.EV.E1.Ety)) 4508 { 4509 case TYfptr: 4510 case TYhptr: 4511 seg = CFes; 4512 retregs |= mES; 4513 break; 4514 4515 case TYsptr: 4516 if (config.wflags & WFssneds) 4517 seg = CFss; 4518 break; 4519 4520 case TYfgPtr: 4521 if (I32) 4522 seg = CFgs; 4523 else if (I64) 4524 seg = CFfs; 4525 else 4526 assert(0); 4527 break; 4528 4529 case TYcptr: 4530 seg = CFcs; 4531 break; 4532 4533 default: 4534 break; 4535 } 4536 } 4537 codelem(cdb, e1.EV.E1, &retregs, false); 4538 freenode(e1); 4539 break; 4540 4541 case OPvar: 4542 /* Symbol is no longer a candidate for a register */ 4543 e1.EV.Vsym.Sflags &= ~GTregcand; 4544 4545 if (!e1.Ecount && npushes > 4) 4546 { 4547 /* Kludge to point at last word in struct. */ 4548 /* Don't screw up CSEs. */ 4549 e1.EV.Voffset += sz - pushsize; 4550 doneoff = true; 4551 } 4552 //if (LARGEDATA) /* if default isn't DS */ 4553 { 4554 static immutable uint[4] segtocf = [ CFes,CFcs,CFss,0 ]; 4555 4556 int fl = el_fl(e1); 4557 if (fl == FLfardata) 4558 { 4559 seg = CFes; 4560 retregs |= mES; 4561 } 4562 else 4563 { 4564 uint s = segfl[fl]; 4565 assert(s < 4); 4566 seg = segtocf[s]; 4567 if (seg == CFss && !(config.wflags & WFssneds)) 4568 seg = 0; 4569 } 4570 } 4571 if (e1.Ety & mTYfar) 4572 { 4573 seg = CFes; 4574 retregs |= mES; 4575 } 4576 cdrelconst(cdb, e1, &retregs); 4577 // Reverse the effect of the previous add 4578 if (doneoff) 4579 e1.EV.Voffset -= sz - pushsize; 4580 freenode(e1); 4581 break; 4582 4583 case OPstreq: 4584 //case OPcond: 4585 if (config.exe & EX_segmented) 4586 { 4587 seg = CFes; 4588 retregs |= mES; 4589 } 4590 codelem(cdb, e1, &retregs, false); 4591 break; 4592 4593 case OPpair: 4594 case OPrpair: 4595 pushParams(cdb, e1, stackalign, tyf); 4596 freenode(e); 4597 return; 4598 4599 default: 4600 elem_print(e1); 4601 assert(0); 4602 } 4603 reg_t reg = findreglsw(retregs); 4604 rm = I16 ? regtorm[reg] : regtorm32[reg]; 4605 if (op16) 4606 seg |= CFopsize; // operand size 4607 if (npushes <= 4) 4608 { 4609 assert(!doneoff); 4610 for (; npushes > 1; --npushes) 4611 { 4612 cdb.genc1(0xFF, buildModregrm(2, 6, rm), FLconst, pushsize * (npushes - 1)); // PUSH [reg] 4613 code_orflag(cdb.last(),seg); 4614 cdb.genadjesp(pushsize); 4615 } 4616 cdb.gen2(0xFF,buildModregrm(0, 6, rm)); // PUSH [reg] 4617 cdb.last().Iflags |= seg; 4618 cdb.genadjesp(pushsize); 4619 } 4620 else if (sz) 4621 { 4622 getregs_imm(cdb, mCX | retregs); 4623 // MOV CX,sz/2 4624 movregconst(cdb, CX, npushes, 0); 4625 if (!doneoff) 4626 { // This should be done when 4627 // reg is loaded. Fix later 4628 // ADD reg,sz-pushsize 4629 cdb.genc2(0x81, grex | modregrmx(3, 0, reg), sz-pushsize); 4630 } 4631 getregs(cdb,mCX); // the LOOP decrements it 4632 cdb.gen2(0xFF, buildModregrm(0, 6, rm)); // PUSH [reg] 4633 cdb.last().Iflags |= seg | CFtarg2; 4634 code* c3 = cdb.last(); 4635 cdb.genc2(0x81,grex | buildModregrm(3, 5,reg), pushsize); // SUB reg,pushsize 4636 if (I16 || config.flags4 & CFG4space) 4637 genjmp(cdb,0xE2,FLcode,cast(block *)c3);// LOOP c3 4638 else 4639 { 4640 if (I64) 4641 cdb.gen2(0xFF, modregrm(3, 1, CX));// DEC CX 4642 else 4643 cdb.gen1(0x48 + CX); // DEC CX 4644 genjmp(cdb, JNE, FLcode, cast(block *)c3); // JNE c3 4645 } 4646 regimmed_set(CX,0); 4647 cdb.genadjesp(cast(int)sz); 4648 } 4649 stackpush += sz; 4650 freenode(e); 4651 return; 4652 } 4653 4654 case OPind: 4655 if (!e.Ecount) /* if *e1 */ 4656 { 4657 if (sz < REGSIZE) 4658 { 4659 /* Don't push REGSIZE quantity because it may 4660 * straddle past the end of valid memory 4661 */ 4662 break; 4663 } 4664 if (sz == REGSIZE) 4665 goto case OPvar; // handle it with loadea() 4666 4667 // Avoid PUSH MEM on the Pentium when optimizing for speed 4668 if (config.flags4 & CFG4speed && 4669 (config.target_cpu >= TARGET_80486 && 4670 config.target_cpu <= TARGET_PentiumMMX) && 4671 sz <= 2 * REGSIZE && 4672 !tyfloating(tym)) 4673 break; 4674 4675 if (tym == TYldouble || tym == TYildouble || tycomplex(tym)) 4676 break; 4677 4678 code cs; 4679 cs.Iflags = 0; 4680 cs.Irex = 0; 4681 if (I32) 4682 { 4683 assert(sz >= REGSIZE * 2); 4684 loadea(cdb, e, &cs, 0xFF, 6, sz - REGSIZE, 0, 0); // PUSH EA+4 4685 cdb.genadjesp(REGSIZE); 4686 stackpush += REGSIZE; 4687 sz -= REGSIZE; 4688 4689 if (sz > REGSIZE) 4690 { 4691 while (sz) 4692 { 4693 cs.IEV1.Voffset -= REGSIZE; 4694 cdb.gen(&cs); // PUSH EA+... 4695 cdb.genadjesp(REGSIZE); 4696 stackpush += REGSIZE; 4697 sz -= REGSIZE; 4698 } 4699 freenode(e); 4700 return; 4701 } 4702 } 4703 else 4704 { 4705 if (sz == DOUBLESIZE) 4706 { 4707 loadea(cdb, e, &cs, 0xFF, 6, DOUBLESIZE - REGSIZE, 0, 0); // PUSH EA+6 4708 cs.IEV1.Voffset -= REGSIZE; 4709 cdb.gen(&cs); // PUSH EA+4 4710 cdb.genadjesp(REGSIZE); 4711 getlvalue_lsw(&cs); 4712 cdb.gen(&cs); // PUSH EA+2 4713 } 4714 else /* TYlong */ 4715 loadea(cdb, e, &cs, 0xFF, 6, REGSIZE, 0, 0); // PUSH EA+2 4716 cdb.genadjesp(REGSIZE); 4717 } 4718 stackpush += sz; 4719 getlvalue_lsw(&cs); 4720 cdb.gen(&cs); // PUSH EA 4721 cdb.genadjesp(REGSIZE); 4722 freenode(e); 4723 return; 4724 } 4725 break; 4726 4727 case OPnp_fp: 4728 if (!e.Ecount) /* if (far *)e1 */ 4729 { 4730 elem* e1 = e.EV.E1; 4731 tym_t tym1 = tybasic(e1.Ety); 4732 /* BUG: what about pointers to functions? */ 4733 int segreg; 4734 switch (tym1) 4735 { 4736 case TYnptr: segreg = 3<<3; break; 4737 case TYcptr: segreg = 1<<3; break; 4738 default: segreg = 2<<3; break; 4739 } 4740 if (I32 && stackalign == 2) 4741 cdb.gen1(0x66); // push a word 4742 cdb.gen1(0x06 + segreg); // PUSH SEGREG 4743 if (I32 && stackalign == 2) 4744 code_orflag(cdb.last(), CFopsize); // push a word 4745 cdb.genadjesp(stackalign); 4746 stackpush += stackalign; 4747 pushParams(cdb, e1, stackalign, tyf); 4748 freenode(e); 4749 return; 4750 } 4751 break; 4752 4753 case OPrelconst: 4754 if (config.exe & EX_segmented) 4755 { 4756 /* Determine if we can just push the segment register */ 4757 /* Test size of type rather than TYfptr because of (long)(&v) */ 4758 Symbol* s = e.EV.Vsym; 4759 //if (sytab[s.Sclass] & SCSS && !I32) // if variable is on stack 4760 // needframe = true; // then we need stack frame 4761 int fl; 4762 if (_tysize[tym] == tysize(TYfptr) && 4763 (fl = s.Sfl) != FLfardata && 4764 /* not a function that CS might not be the segment of */ 4765 (!((fl == FLfunc || s.ty() & mTYcs) && 4766 (s.Sclass == SCcomdat || s.Sclass == SCextern || s.Sclass == SCinline || config.wflags & WFthunk)) || 4767 (fl == FLfunc && config.exe == EX_DOSX) 4768 ) 4769 ) 4770 { 4771 stackpush += sz; 4772 cdb.gen1(0x06 + // PUSH SEGREG 4773 (((fl == FLfunc || s.ty() & mTYcs) ? 1 : segfl[fl]) << 3)); 4774 cdb.genadjesp(REGSIZE); 4775 4776 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4777 { 4778 getoffset(cdb, e, STACK); 4779 freenode(e); 4780 return; 4781 } 4782 else 4783 { 4784 regm_t retregs; 4785 offsetinreg(cdb, e, &retregs); 4786 const reg = findreg(retregs); 4787 genpush(cdb,reg); // PUSH reg 4788 cdb.genadjesp(REGSIZE); 4789 } 4790 return; 4791 } 4792 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4793 { 4794 stackpush += sz; 4795 if (_tysize[tym] == tysize(TYfptr)) 4796 { 4797 // PUSH SEG e 4798 cdb.gencs(0x68,0,FLextern,s); 4799 cdb.last().Iflags = CFseg; 4800 cdb.genadjesp(REGSIZE); 4801 } 4802 getoffset(cdb, e, STACK); 4803 freenode(e); 4804 return; 4805 } 4806 } 4807 break; /* else must evaluate expression */ 4808 4809 case OPvar: 4810 L1: 4811 if (config.flags4 & CFG4speed && 4812 (config.target_cpu >= TARGET_80486 && 4813 config.target_cpu <= TARGET_PentiumMMX) && 4814 sz <= 2 * REGSIZE && 4815 !tyfloating(tym)) 4816 { // Avoid PUSH MEM on the Pentium when optimizing for speed 4817 break; 4818 } 4819 else if (movOnly(e) || (tyxmmreg(tym) && config.fpxmmregs) || tyvector(tym)) 4820 break; // no PUSH MEM 4821 else 4822 { 4823 int regsize = REGSIZE; 4824 uint flag = 0; 4825 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 && 4826 !e.Ecount) 4827 { 4828 regsize = 4; 4829 flag |= CFopsize; 4830 } 4831 code cs; 4832 cs.Iflags = 0; 4833 cs.Irex = 0; 4834 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); // PUSH EA+sz-2 4835 code_orflag(cdb.last(), flag); 4836 cdb.genadjesp(REGSIZE); 4837 stackpush += sz; 4838 while (cast(targ_int)(sz -= regsize) > 0) 4839 { 4840 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); 4841 code_orflag(cdb.last(), flag); 4842 cdb.genadjesp(REGSIZE); 4843 } 4844 freenode(e); 4845 return; 4846 } 4847 4848 case OPconst: 4849 { 4850 char pushi = 0; 4851 uint flag = 0; 4852 int regsize = REGSIZE; 4853 4854 if (tycomplex(tym)) 4855 break; 4856 4857 if (I64 && tyfloating(tym) && sz > 4 && boolres(e)) 4858 // Can't push 64 bit non-zero args directly 4859 break; 4860 4861 if (I32 && szb == 10) // special case for long double constants 4862 { 4863 assert(sz == 12); 4864 targ_int value = e.EV.Vushort8[4]; // pick upper 2 bytes of Vldouble 4865 stackpush += sz; 4866 cdb.genadjesp(cast(int)sz); 4867 for (int i = 0; i < 3; ++i) 4868 { 4869 reg_t reg; 4870 if (reghasvalue(allregs, value, ®)) 4871 cdb.gen1(0x50 + reg); // PUSH reg 4872 else 4873 cdb.genc2(0x68,0,value); // PUSH value 4874 value = e.EV.Vulong4[i ^ 1]; // treat Vldouble as 2 element array of 32 bit uint 4875 } 4876 freenode(e); 4877 return; 4878 } 4879 4880 assert(I64 || sz <= tysize(TYldouble)); 4881 int i = cast(int)sz; 4882 if (!I16 && i == 2) 4883 flag = CFopsize; 4884 4885 if (config.target_cpu >= TARGET_80286) 4886 // && (e.Ecount == 0 || e.Ecount != e.Ecomsub)) 4887 { 4888 pushi = 1; 4889 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4) 4890 { 4891 regsize = 4; 4892 flag = CFopsize; 4893 } 4894 } 4895 else if (i == REGSIZE) 4896 break; 4897 4898 stackpush += sz; 4899 cdb.genadjesp(cast(int)sz); 4900 targ_uns* pi = &e.EV.Vuns; // point to start of Vdouble 4901 targ_ushort* ps = cast(targ_ushort *) pi; 4902 targ_ullong* pl = cast(targ_ullong *)pi; 4903 i /= regsize; 4904 do 4905 { 4906 if (i) /* be careful not to go negative */ 4907 i--; 4908 4909 targ_size_t value; 4910 switch (regsize) 4911 { 4912 case 2: 4913 value = ps[i]; 4914 break; 4915 4916 case 4: 4917 if (tym == TYldouble || tym == TYildouble) 4918 /* The size is 10 bytes, and since we have 2 bytes left over, 4919 * just read those 2 bytes, not 4. 4920 * Otherwise we're reading uninitialized data. 4921 * I.e. read 4 bytes, 4 bytes, then 2 bytes 4922 */ 4923 value = i == 2 ? ps[4] : pi[i]; // 80 bits 4924 else 4925 value = pi[i]; 4926 break; 4927 4928 case 8: 4929 value = cast(targ_size_t)pl[i]; 4930 break; 4931 4932 default: 4933 assert(0); 4934 } 4935 4936 reg_t reg; 4937 if (pushi) 4938 { 4939 if (I64 && regsize == 8 && value != cast(int)value) 4940 { 4941 regwithvalue(cdb,allregs,value,®,64); 4942 goto Preg; // cannot push imm64 unless it is sign extended 32 bit value 4943 } 4944 if (regsize == REGSIZE && reghasvalue(allregs,value,®)) 4945 goto Preg; 4946 cdb.genc2((szb == 1) ? 0x6A : 0x68, 0, value); // PUSH value 4947 } 4948 else 4949 { 4950 regwithvalue(cdb, allregs, value, ®, 0); 4951 Preg: 4952 genpush(cdb,reg); // PUSH reg 4953 } 4954 code_orflag(cdb.last(), flag); // operand size 4955 } while (i); 4956 freenode(e); 4957 return; 4958 } 4959 4960 case OPpair: 4961 { 4962 if (e.Ecount) 4963 break; 4964 const op1 = e.EV.E1.Eoper; 4965 const op2 = e.EV.E2.Eoper; 4966 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 4967 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 4968 { 4969 pushParams(cdb, e.EV.E2, stackalign, tyf); 4970 pushParams(cdb, e.EV.E1, stackalign, tyf); 4971 freenode(e); 4972 } 4973 else if (tyfloating(e.EV.E1.Ety) || 4974 tyfloating(e.EV.E2.Ety)) 4975 { 4976 // Need special handling because of order of evaluation of e1 and e2 4977 break; 4978 } 4979 else 4980 { 4981 regm_t regs = allregs; 4982 codelem(cdb, e, ®s, false); 4983 genpush(cdb, findregmsw(regs)); // PUSH msreg 4984 genpush(cdb, findreglsw(regs)); // PUSH lsreg 4985 cdb.genadjesp(cast(int)sz); 4986 stackpush += sz; 4987 } 4988 return; 4989 } 4990 4991 case OPrpair: 4992 { 4993 if (e.Ecount) 4994 break; 4995 const op1 = e.EV.E1.Eoper; 4996 const op2 = e.EV.E2.Eoper; 4997 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 4998 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 4999 { 5000 pushParams(cdb, e.EV.E1, stackalign, tyf); 5001 pushParams(cdb, e.EV.E2, stackalign, tyf); 5002 freenode(e); 5003 } 5004 else if (tyfloating(e.EV.E1.Ety) || 5005 tyfloating(e.EV.E2.Ety)) 5006 { 5007 // Need special handling because of order of evaluation of e1 and e2 5008 break; 5009 } 5010 else 5011 { 5012 regm_t regs = allregs; 5013 codelem(cdb, e, ®s, false); 5014 genpush(cdb, findregmsw(regs)); // PUSH msreg 5015 genpush(cdb, findreglsw(regs)); // PUSH lsreg 5016 cdb.genadjesp(cast(int)sz); 5017 stackpush += sz; 5018 } 5019 return; 5020 } 5021 5022 default: 5023 break; 5024 } 5025 5026 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 5027 if (tyvector(tym) || (tyxmmreg(tym) && config.fpxmmregs)) 5028 { 5029 regm_t retxmm = XMMREGS; 5030 codelem(cdb, e, &retxmm, false); 5031 stackpush += sz; 5032 cdb.genadjesp(cast(int)sz); 5033 cod3_stackadj(cdb, cast(int)sz); 5034 const op = xmmstore(tym); 5035 const r = findreg(retxmm); 5036 cdb.gen2sib(op, modregxrm(0, r - XMM0,4 ), modregrm(0, 4, SP)); // MOV [ESP],r 5037 checkSetVex(cdb.last(),tym); 5038 return; 5039 } 5040 else if (tyfloating(tym)) 5041 { 5042 if (config.inline8087) 5043 { 5044 retregs = tycomplex(tym) ? mST01 : mST0; 5045 codelem(cdb, e, &retregs, false); 5046 stackpush += sz; 5047 cdb.genadjesp(cast(int)sz); 5048 cod3_stackadj(cdb, cast(int)sz); 5049 opcode_t op; 5050 uint r; 5051 switch (tym) 5052 { 5053 case TYfloat: 5054 case TYifloat: 5055 case TYcfloat: 5056 op = 0xD9; 5057 r = 3; 5058 break; 5059 5060 case TYdouble: 5061 case TYidouble: 5062 case TYdouble_alias: 5063 case TYcdouble: 5064 op = 0xDD; 5065 r = 3; 5066 break; 5067 5068 case TYldouble: 5069 case TYildouble: 5070 case TYcldouble: 5071 op = 0xDB; 5072 r = 7; 5073 break; 5074 5075 default: 5076 assert(0); 5077 } 5078 if (!I16) 5079 { 5080 if (tycomplex(tym)) 5081 { 5082 // FSTP sz/2[ESP] 5083 cdb.genc1(op, (modregrm(0, 4, SP) << 8) | modregxrm(2, r, 4),FLconst, sz/2); 5084 pop87(); 5085 } 5086 pop87(); 5087 cdb.gen2sib(op, modregrm(0, r, 4),modregrm(0, 4, SP)); // FSTP [ESP] 5088 } 5089 else 5090 { 5091 retregs = IDXREGS; // get an index reg 5092 reg_t reg; 5093 allocreg(cdb, &retregs, ®, TYoffset); 5094 genregs(cdb, 0x89, SP, reg); // MOV reg,SP 5095 pop87(); 5096 cdb.gen2(op, modregrm(0, r, regtorm[reg])); // FSTP [reg] 5097 } 5098 if (LARGEDATA) 5099 cdb.last().Iflags |= CFss; // want to store into stack 5100 genfwait(cdb); // FWAIT 5101 return; 5102 } 5103 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 5104 retregs = mSTACK; 5105 } 5106 else if (I16 && sz == 8) // if long long 5107 retregs = mSTACK; 5108 5109 scodelem(cdb,e,&retregs,0,true); 5110 if (retregs != mSTACK) // if stackpush not already inc'd 5111 stackpush += sz; 5112 if (sz <= REGSIZE) 5113 { 5114 genpush(cdb,findreg(retregs)); // PUSH reg 5115 cdb.genadjesp(cast(int)REGSIZE); 5116 } 5117 else if (sz == REGSIZE * 2) 5118 { 5119 genpush(cdb,findregmsw(retregs)); // PUSH msreg 5120 genpush(cdb,findreglsw(retregs)); // PUSH lsreg 5121 cdb.genadjesp(cast(int)sz); 5122 } 5123 } 5124 5125 /******************************* 5126 * Get offset portion of e, and store it in an index 5127 * register. Return mask of index register in *pretregs. 5128 */ 5129 5130 void offsetinreg(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 5131 { 5132 reg_t reg; 5133 regm_t retregs = mLSW; // want only offset 5134 if (e.Ecount && e.Ecount != e.Ecomsub) 5135 { 5136 regm_t rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */ 5137 for (uint i = 0; rm; i++) 5138 { 5139 if (mask(i) & rm && regcon.cse.value[i] == e) 5140 { 5141 *pretregs = mask(i); 5142 getregs(cdb, *pretregs); 5143 goto L3; 5144 } 5145 rm &= ~mask(i); 5146 } 5147 } 5148 5149 *pretregs = retregs; 5150 allocreg(cdb, pretregs, ®, TYoffset); 5151 getoffset(cdb,e,reg); 5152 L3: 5153 cssave(e, *pretregs,false); 5154 freenode(e); 5155 } 5156 5157 /****************************** 5158 * Generate code to load data into registers. 5159 */ 5160 5161 5162 void loaddata(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 5163 { 5164 reg_t reg; 5165 reg_t nreg; 5166 reg_t sreg; 5167 opcode_t op; 5168 tym_t tym; 5169 code cs; 5170 regm_t flags, forregs, regm; 5171 5172 debug 5173 { 5174 // if (debugw) 5175 // printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs)); 5176 // elem_print(e); 5177 } 5178 5179 assert(e); 5180 elem_debug(e); 5181 if (*pretregs == 0) 5182 return; 5183 tym = tybasic(e.Ety); 5184 if (tym == TYstruct) 5185 { 5186 cdrelconst(cdb,e,pretregs); 5187 return; 5188 } 5189 if (tyfloating(tym)) 5190 { 5191 objmod.fltused(); 5192 if (config.fpxmmregs && 5193 (tym == TYcfloat || tym == TYcdouble) && 5194 (*pretregs & (XMMREGS | mPSW)) 5195 ) 5196 { 5197 cloadxmm(cdb, e, pretregs); 5198 return; 5199 } 5200 else if (config.inline8087) 5201 { 5202 if (*pretregs & mST0) 5203 { 5204 load87(cdb, e, 0, pretregs, null, -1); 5205 return; 5206 } 5207 else if (tycomplex(tym)) 5208 { 5209 cload87(cdb, e, pretregs); 5210 return; 5211 } 5212 } 5213 } 5214 int sz = _tysize[tym]; 5215 cs.Iflags = 0; 5216 cs.Irex = 0; 5217 if (*pretregs == mPSW) 5218 { 5219 Symbol *s; 5220 regm = allregs; 5221 if (e.Eoper == OPconst) 5222 { /* true: OR SP,SP (SP is never 0) */ 5223 /* false: CMP SP,SP (always equal) */ 5224 genregs(cdb, (boolres(e)) ? 0x09 : 0x39 , SP, SP); 5225 if (I64) 5226 code_orrex(cdb.last(), REX_W); 5227 } 5228 else if (e.Eoper == OPvar && 5229 (s = e.EV.Vsym).Sfl == FLreg && 5230 s.Sregm & XMMREGS && 5231 (tym == TYfloat || tym == TYifloat || tym == TYdouble || tym ==TYidouble)) 5232 { 5233 tstresult(cdb,s.Sregm,e.Ety,true); 5234 } 5235 else if (sz <= REGSIZE) 5236 { 5237 if (!I16 && (tym == TYfloat || tym == TYifloat)) 5238 { 5239 allocreg(cdb, ®m, ®, TYoffset); // get a register 5240 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5241 cdb.gen2(0xD1,modregrmx(3,4,reg)); // SHL reg,1 5242 } 5243 else if (I64 && (tym == TYdouble || tym ==TYidouble)) 5244 { 5245 allocreg(cdb, ®m, ®, TYoffset); // get a register 5246 loadea(cdb, e,&cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5247 // remove sign bit, so that -0.0 == 0.0 5248 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 5249 code_orrex(cdb.last(), REX_W); 5250 } 5251 else if (TARGET_OSX && e.Eoper == OPvar && movOnly(e)) 5252 { 5253 allocreg(cdb, ®m, ®, TYoffset); // get a register 5254 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 5255 fixresult(cdb, e, regm, pretregs); 5256 } 5257 else 5258 { cs.IFL2 = FLconst; 5259 cs.IEV2.Vsize_t = 0; 5260 op = (sz == 1) ? 0x80 : 0x81; 5261 loadea(cdb, e, &cs, op, 7, 0, 0, 0); // CMP EA,0 5262 5263 // Convert to TEST instruction if EA is a register 5264 // (to avoid register contention on Pentium) 5265 code *c = cdb.last(); 5266 if ((c.Iop & ~1) == 0x38 && 5267 (c.Irm & modregrm(3, 0, 0)) == modregrm(3, 0, 0) 5268 ) 5269 { 5270 c.Iop = (c.Iop & 1) | 0x84; 5271 code_newreg(c, c.Irm & 7); 5272 if (c.Irex & REX_B) 5273 //c.Irex = (c.Irex & ~REX_B) | REX_R; 5274 c.Irex |= REX_R; 5275 } 5276 } 5277 } 5278 else if (sz < 8) 5279 { 5280 allocreg(cdb, ®m, ®, TYoffset); // get a register 5281 if (I32) // it's a 48 bit pointer 5282 loadea(cdb, e, &cs, MOVZXw, reg, REGSIZE, 0, 0); // MOVZX reg,data+4 5283 else 5284 { 5285 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, 0, 0); // MOV reg,data+2 5286 if (tym == TYfloat || tym == TYifloat) // dump sign bit 5287 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5288 } 5289 loadea(cdb,e,&cs,0x0B,reg,0,regm,0); // OR reg,data 5290 } 5291 else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym))) 5292 { 5293 allocreg(cdb, ®m, ®, TYoffset); // get a register 5294 int i = sz - REGSIZE; 5295 loadea(cdb, e, &cs, 0x8B, reg, i, 0, 0); // MOV reg,data+6 5296 if (tyfloating(tym)) // TYdouble or TYdouble_alias 5297 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5298 5299 while ((i -= REGSIZE) >= 0) 5300 { 5301 loadea(cdb, e, &cs, 0x0B, reg, i, regm, 0); // OR reg,data+i 5302 code *c = cdb.last(); 5303 if (i == 0) 5304 c.Iflags |= CFpsw; // need the flags on last OR 5305 } 5306 } 5307 else if (sz == tysize(TYldouble)) // TYldouble 5308 load87(cdb, e, 0, pretregs, null, -1); 5309 else 5310 { 5311 elem_print(e); 5312 assert(0); 5313 } 5314 return; 5315 } 5316 /* not for flags only */ 5317 flags = *pretregs & mPSW; /* save original */ 5318 forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS); 5319 if (*pretregs & mSTACK) 5320 forregs |= DOUBLEREGS; 5321 if (e.Eoper == OPconst) 5322 { 5323 targ_size_t value = e.EV.Vint; 5324 if (sz == 8) 5325 value = cast(targ_size_t)e.EV.Vullong; 5326 5327 if (sz == REGSIZE && reghasvalue(forregs, value, ®)) 5328 forregs = mask(reg); 5329 5330 regm_t save = regcon.immed.mval; 5331 allocreg(cdb, &forregs, ®, tym); // allocate registers 5332 regcon.immed.mval = save; // KLUDGE! 5333 if (sz <= REGSIZE) 5334 { 5335 if (sz == 1) 5336 flags |= 1; 5337 else if (!I16 && sz == SHORTSIZE && 5338 !(mask(reg) & regcon.mvar) && 5339 !(config.flags4 & CFG4speed) 5340 ) 5341 flags |= 2; 5342 if (sz == 8) 5343 flags |= 64; 5344 if (isXMMreg(reg)) 5345 { /* This comes about because 0, 1, pi, etc., constants don't get stored 5346 * in the data segment, because they are x87 opcodes. 5347 * Not so efficient. We should at least do a PXOR for 0. 5348 */ 5349 reg_t r; 5350 targ_size_t unsvalue = e.EV.Vuns; 5351 if (sz == 8) 5352 unsvalue = cast(targ_size_t)e.EV.Vullong; 5353 regwithvalue(cdb,ALLREGS, unsvalue,&r,flags); 5354 flags = 0; // flags are already set 5355 cdb.genfltreg(0x89, r, 0); // MOV floatreg,r 5356 if (sz == 8) 5357 code_orrex(cdb.last(), REX_W); 5358 assert(sz == 4 || sz == 8); // float or double 5359 const opmv = xmmload(tym); 5360 cdb.genxmmreg(opmv, reg, 0, tym); // MOVSS/MOVSD XMMreg,floatreg 5361 } 5362 else 5363 { 5364 movregconst(cdb, reg, value, flags); 5365 flags = 0; // flags are already set 5366 } 5367 } 5368 else if (sz < 8) // far pointers, longs for 16 bit targets 5369 { 5370 targ_int msw = I32 ? e.EV.Vseg 5371 : (e.EV.Vulong >> 16); 5372 targ_int lsw = e.EV.Voff; 5373 regm_t mswflags = 0; 5374 if (forregs & mES) 5375 { 5376 movregconst(cdb, reg, msw, 0); // MOV reg,segment 5377 genregs(cdb, 0x8E, 0, reg); // MOV ES,reg 5378 msw = lsw; // MOV reg,offset 5379 } 5380 else 5381 { 5382 sreg = findreglsw(forregs); 5383 movregconst(cdb, sreg, lsw, 0); 5384 reg = findregmsw(forregs); 5385 /* Decide if we need to set flags when we load msw */ 5386 if (flags && (msw && msw|lsw || !(msw|lsw))) 5387 { mswflags = mPSW; 5388 flags = 0; 5389 } 5390 } 5391 movregconst(cdb, reg, msw, mswflags); 5392 } 5393 else if (sz == 8) 5394 { 5395 if (I32) 5396 { 5397 targ_long *p = cast(targ_long *)cast(void*)&e.EV.Vdouble; 5398 if (isXMMreg(reg)) 5399 { /* This comes about because 0, 1, pi, etc., constants don't get stored 5400 * in the data segment, because they are x87 opcodes. 5401 * Not so efficient. We should at least do a PXOR for 0. 5402 */ 5403 reg_t r; 5404 regm_t rm = ALLREGS; 5405 allocreg(cdb, &rm, &r, TYint); // allocate scratch register 5406 movregconst(cdb, r, p[0], 0); 5407 cdb.genfltreg(0x89, r, 0); // MOV floatreg,r 5408 movregconst(cdb, r, p[1], 0); 5409 cdb.genfltreg(0x89, r, 4); // MOV floatreg+4,r 5410 5411 const opmv = xmmload(tym); 5412 cdb.genxmmreg(opmv, reg, 0, tym); // MOVSS/MOVSD XMMreg,floatreg 5413 } 5414 else 5415 { 5416 movregconst(cdb, findreglsw(forregs) ,p[0], 0); 5417 movregconst(cdb, findregmsw(forregs) ,p[1], 0); 5418 } 5419 } 5420 else 5421 { targ_short *p = &e.EV.Vshort; // point to start of Vdouble 5422 5423 assert(reg == AX); 5424 movregconst(cdb, AX, p[3], 0); // MOV AX,p[3] 5425 movregconst(cdb, DX, p[0], 0); 5426 movregconst(cdb, CX, p[1], 0); 5427 movregconst(cdb, BX, p[2], 0); 5428 } 5429 } 5430 else if (I64 && sz == 16) 5431 { 5432 movregconst(cdb, findreglsw(forregs), cast(targ_size_t)e.EV.Vcent.lsw, 64); 5433 movregconst(cdb, findregmsw(forregs), cast(targ_size_t)e.EV.Vcent.msw, 64); 5434 } 5435 else 5436 assert(0); 5437 // Flags may already be set 5438 *pretregs &= flags | ~mPSW; 5439 fixresult(cdb, e, forregs, pretregs); 5440 return; 5441 } 5442 else 5443 { 5444 // See if we can use register that parameter was passed in 5445 if (regcon.params && 5446 regParamInPreg(e.EV.Vsym) && 5447 !anyiasm && // may have written to the memory for the parameter 5448 (regcon.params & mask(e.EV.Vsym.Spreg) && e.EV.Voffset == 0 || 5449 regcon.params & mask(e.EV.Vsym.Spreg2) && e.EV.Voffset == REGSIZE) && 5450 sz <= REGSIZE) // make sure no 'paint' to a larger size happened 5451 { 5452 reg = e.EV.Voffset ? e.EV.Vsym.Spreg2 : e.EV.Vsym.Spreg; 5453 forregs = mask(reg); 5454 5455 if (debugr) 5456 printf("%s.%d is fastpar and using register %s\n", 5457 e.EV.Vsym.Sident.ptr, 5458 cast(int)e.EV.Voffset, 5459 regm_str(forregs)); 5460 5461 mfuncreg &= ~forregs; 5462 regcon.used |= forregs; 5463 fixresult(cdb,e,forregs,pretregs); 5464 return; 5465 } 5466 5467 allocreg(cdb, &forregs, ®, tym); // allocate registers 5468 5469 if (sz == 1) 5470 { regm_t nregm; 5471 5472 debug 5473 if (!(forregs & BYTEREGS)) 5474 { elem_print(e); 5475 printf("forregs = %s\n", regm_str(forregs)); 5476 } 5477 5478 opcode_t opmv = 0x8A; // byte MOV 5479 if (config.exe & (EX_OSX | EX_OSX64)) 5480 { 5481 if (movOnly(e)) 5482 opmv = 0x8B; 5483 } 5484 assert(forregs & BYTEREGS); 5485 if (!I16) 5486 { 5487 if (config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed && 5488 // Workaround for OSX linker bug: 5489 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5490 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5491 ) 5492 { 5493 // opmv = tyuns(tym) ? MOVZXb : MOVSXb; // MOVZX/MOVSX 5494 } 5495 loadea(cdb, e, &cs, opmv, reg, 0, 0, 0); // MOV regL,data 5496 } 5497 else 5498 { 5499 nregm = tyuns(tym) ? BYTEREGS : cast(regm_t) mAX; 5500 if (*pretregs & nregm) 5501 nreg = reg; // already allocated 5502 else 5503 allocreg(cdb, &nregm, &nreg, tym); 5504 loadea(cdb, e, &cs, opmv, nreg, 0, 0, 0); // MOV nregL,data 5505 if (reg != nreg) 5506 { 5507 genmovreg(cdb, reg, nreg); // MOV reg,nreg 5508 cssave(e, mask(nreg), false); 5509 } 5510 } 5511 } 5512 else if (forregs & XMMREGS) 5513 { 5514 // Can't load from registers directly to XMM regs 5515 //e.EV.Vsym.Sflags &= ~GTregcand; 5516 5517 opcode_t opmv = xmmload(tym, xmmIsAligned(e)); 5518 if (e.Eoper == OPvar) 5519 { 5520 Symbol *s = e.EV.Vsym; 5521 if (s.Sfl == FLreg && !(mask(s.Sreglsw) & XMMREGS)) 5522 { opmv = LODD; // MOVD/MOVQ 5523 /* getlvalue() will unwind this and unregister s; could use a better solution */ 5524 } 5525 } 5526 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); // MOVSS/MOVSD reg,data 5527 checkSetVex(cdb.last(),tym); 5528 } 5529 else if (sz <= REGSIZE) 5530 { 5531 opcode_t opmv = 0x8B; // MOV reg,data 5532 if (sz == 2 && !I16 && config.target_cpu >= TARGET_PentiumPro && 5533 // Workaround for OSX linker bug: 5534 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5535 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5536 ) 5537 { 5538 // opmv = tyuns(tym) ? MOVZXw : MOVSXw; // MOVZX/MOVSX 5539 } 5540 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); 5541 } 5542 else if (sz <= 2 * REGSIZE && forregs & mES) 5543 { 5544 loadea(cdb, e, &cs, 0xC4, reg, 0, 0, mES); // LES data 5545 } 5546 else if (sz <= 2 * REGSIZE) 5547 { 5548 if (I32 && sz == 8 && 5549 (*pretregs & (mSTACK | mPSW)) == mSTACK) 5550 { 5551 assert(0); 5552 /+ 5553 /* Note that we allocreg(DOUBLEREGS) needlessly */ 5554 stackchanged = 1; 5555 int i = DOUBLESIZE - REGSIZE; 5556 do 5557 { 5558 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5559 cdb.genadjesp(REGSIZE); 5560 stackpush += REGSIZE; 5561 i -= REGSIZE; 5562 } 5563 while (i >= 0); 5564 return; 5565 +/ 5566 } 5567 5568 reg = findregmsw(forregs); 5569 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, forregs, 0); // MOV reg,data+2 5570 if (I32 && sz == REGSIZE + 2) 5571 cdb.last().Iflags |= CFopsize; // seg is 16 bits 5572 reg = findreglsw(forregs); 5573 loadea(cdb, e, &cs, 0x8B, reg, 0, forregs, 0); // MOV reg,data 5574 } 5575 else if (sz >= 8) 5576 { 5577 assert(!I32); 5578 if ((*pretregs & (mSTACK | mPSW)) == mSTACK) 5579 { 5580 // Note that we allocreg(DOUBLEREGS) needlessly 5581 stackchanged = 1; 5582 int i = sz - REGSIZE; 5583 do 5584 { 5585 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5586 cdb.genadjesp(REGSIZE); 5587 stackpush += REGSIZE; 5588 i -= REGSIZE; 5589 } 5590 while (i >= 0); 5591 return; 5592 } 5593 else 5594 { 5595 assert(reg == AX); 5596 loadea(cdb, e, &cs, 0x8B, AX, 6, 0, 0); // MOV AX,data+6 5597 loadea(cdb, e, &cs, 0x8B, BX, 4, mAX, 0); // MOV BX,data+4 5598 loadea(cdb, e, &cs, 0x8B, CX, 2, mAX|mBX, 0); // MOV CX,data+2 5599 loadea(cdb, e, &cs, 0x8B, DX, 0, mAX|mCX|mCX, 0); // MOV DX,data 5600 } 5601 } 5602 else 5603 assert(0); 5604 // Flags may already be set 5605 *pretregs &= flags | ~mPSW; 5606 fixresult(cdb, e, forregs, pretregs); 5607 return; 5608 } 5609 } 5610 5611 }