1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1984-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod1.d, backend/cod1.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod1.d 11 */ 12 13 module dmd.backend.cod1; 14 15 version (SCPP) 16 version = COMPILE; 17 version (MARS) 18 version = COMPILE; 19 20 version (COMPILE) 21 { 22 23 import core.stdc.stdio; 24 import core.stdc.stdlib; 25 import core.stdc..string; 26 27 import dmd.backend.backend; 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.code; 31 import dmd.backend.code_x86; 32 import dmd.backend.codebuilder; 33 import dmd.backend.mem; 34 import dmd.backend.el; 35 import dmd.backend.exh; 36 import dmd.backend.global; 37 import dmd.backend.obj; 38 import dmd.backend.oper; 39 import dmd.backend.rtlsym; 40 import dmd.backend.ty; 41 import dmd.backend.type; 42 import dmd.backend.xmm; 43 44 extern (C++): 45 46 nothrow: 47 48 int REGSIZE(); 49 50 extern __gshared CGstate cgstate; 51 extern __gshared ubyte[FLMAX] segfl; 52 extern __gshared bool[FLMAX] stackfl; 53 54 private extern (D) uint mask(uint m) { return 1 << m; } 55 56 private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 57 58 /* array to convert from index register to r/m field */ 59 /* AX CX DX BX SP BP SI DI */ 60 private __gshared const byte[8] regtorm32 = [ 0, 1, 2, 3,-1, 5, 6, 7 ]; 61 __gshared const byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; 62 63 targ_size_t paramsize(elem *e, tym_t tyf); 64 //void funccall(ref CodeBuilder cdb,elem *e,uint numpara,uint numalign, 65 // regm_t *pretregs,regm_t keepmsk, bool usefuncarg); 66 67 /********************************* 68 * Determine if we should leave parameter `s` in the register it 69 * came in, or allocate a register it using the register 70 * allocator. 71 * Params: 72 * s = parameter Symbol 73 * Returns: 74 * `true` if `s` is a register parameter and leave it in the register it came in 75 */ 76 bool regParamInPreg(Symbol* s) 77 { 78 //printf("regPAramInPreg %s\n", s.Sident.ptr); 79 return (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && 80 (!(config.flags4 & CFG4optimized) || !(s.Sflags & GTregcand)); 81 } 82 83 84 /************************** 85 * Determine if e is a 32 bit scaled index addressing mode. 86 * Returns: 87 * 0 not a scaled index addressing mode 88 * !=0 the value for ss in the SIB byte 89 */ 90 91 int isscaledindex(elem *e) 92 { 93 targ_uns ss; 94 95 assert(!I16); 96 while (e.Eoper == OPcomma) 97 e = e.EV.E2; 98 if (!(e.Eoper == OPshl && !e.Ecount && 99 e.EV.E2.Eoper == OPconst && 100 (ss = e.EV.E2.EV.Vuns) <= 3 101 ) 102 ) 103 ss = 0; 104 return ss; 105 } 106 107 /********************************************* 108 * Generate code for which isscaledindex(e) returned a non-zero result. 109 */ 110 111 /*private*/ void cdisscaledindex(ref CodeBuilder cdb,elem *e,regm_t *pidxregs,regm_t keepmsk) 112 { 113 // Load index register with result of e.EV.E1 114 while (e.Eoper == OPcomma) 115 { 116 regm_t r = 0; 117 scodelem(cdb, e.EV.E1, &r, keepmsk, true); 118 freenode(e); 119 e = e.EV.E2; 120 } 121 assert(e.Eoper == OPshl); 122 scodelem(cdb, e.EV.E1, pidxregs, keepmsk, true); 123 freenode(e.EV.E2); 124 freenode(e); 125 } 126 127 /*********************************** 128 * Determine index if we can do two LEA instructions as a multiply. 129 * Returns: 130 * 0 can't do it 131 */ 132 133 enum 134 { 135 SSFLnobp = 1, /// can't have EBP in relconst 136 SSFLnobase1 = 2, /// no base register for first LEA 137 SSFLnobase = 4, /// no base register 138 SSFLlea = 8, /// can do it in one LEA 139 } 140 141 struct Ssindex 142 { 143 targ_uns product; 144 ubyte ss1; 145 ubyte ss2; 146 ubyte ssflags; /// SSFLxxxx 147 } 148 149 private __gshared const Ssindex[21] ssindex_array = 150 [ 151 { 0, 0, 0 }, // [0] is a place holder 152 153 { 3, 1, 0, SSFLnobp | SSFLlea }, 154 { 5, 2, 0, SSFLnobp | SSFLlea }, 155 { 9, 3, 0, SSFLnobp | SSFLlea }, 156 157 { 6, 1, 1, SSFLnobase }, 158 { 12, 1, 2, SSFLnobase }, 159 { 24, 1, 3, SSFLnobase }, 160 { 10, 2, 1, SSFLnobase }, 161 { 20, 2, 2, SSFLnobase }, 162 { 40, 2, 3, SSFLnobase }, 163 { 18, 3, 1, SSFLnobase }, 164 { 36, 3, 2, SSFLnobase }, 165 { 72, 3, 3, SSFLnobase }, 166 167 { 15, 2, 1, SSFLnobp }, 168 { 25, 2, 2, SSFLnobp }, 169 { 27, 3, 1, SSFLnobp }, 170 { 45, 3, 2, SSFLnobp }, 171 { 81, 3, 3, SSFLnobp }, 172 173 { 16, 3, 1, SSFLnobase1 | SSFLnobase }, 174 { 32, 3, 2, SSFLnobase1 | SSFLnobase }, 175 { 64, 3, 3, SSFLnobase1 | SSFLnobase }, 176 ]; 177 178 int ssindex(OPER op,targ_uns product) 179 { 180 if (op == OPshl) 181 product = 1 << product; 182 for (size_t i = 1; i < ssindex_array.length; i++) 183 { 184 if (ssindex_array[i].product == product) 185 return cast(int)i; 186 } 187 return 0; 188 } 189 190 /*************************************** 191 * Build an EA of the form disp[base][index*scale]. 192 * Input: 193 * c struct to fill in 194 * base base register (-1 if none) 195 * index index register (-1 if none) 196 * scale scale factor - 1,2,4,8 197 * disp displacement 198 */ 199 200 void buildEA(code *c,int base,int index,int scale,targ_size_t disp) 201 { 202 ubyte rm; 203 ubyte sib; 204 ubyte rex = 0; 205 206 sib = 0; 207 if (!I16) 208 { uint ss; 209 210 assert(index != SP); 211 212 switch (scale) 213 { case 1: ss = 0; break; 214 case 2: ss = 1; break; 215 case 4: ss = 2; break; 216 case 8: ss = 3; break; 217 default: assert(0); 218 } 219 220 if (base == -1) 221 { 222 if (index == -1) 223 rm = modregrm(0,0,5); 224 else 225 { 226 rm = modregrm(0,0,4); 227 sib = modregrm(ss,index & 7,5); 228 if (index & 8) 229 rex |= REX_X; 230 } 231 } 232 else if (index == -1) 233 { 234 if (base == SP) 235 { 236 rm = modregrm(2, 0, 4); 237 sib = modregrm(0, 4, SP); 238 } 239 else 240 { rm = modregrm(2, 0, base & 7); 241 if (base & 8) 242 { rex |= REX_B; 243 if (base == R12) 244 { 245 rm = modregrm(2, 0, 4); 246 sib = modregrm(0, 4, 4); 247 } 248 } 249 } 250 } 251 else 252 { 253 rm = modregrm(2, 0, 4); 254 sib = modregrm(ss,index & 7,base & 7); 255 if (index & 8) 256 rex |= REX_X; 257 if (base & 8) 258 rex |= REX_B; 259 } 260 } 261 else 262 { 263 // -1 AX CX DX BX SP BP SI DI 264 static immutable ubyte[9][9] EA16rm = 265 [ 266 [ 0x06,0x09,0x09,0x09,0x87,0x09,0x86,0x84,0x85, ], // -1 267 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // AX 268 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // CX 269 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // DX 270 [ 0x87,0x09,0x09,0x09,0x09,0x09,0x09,0x80,0x81, ], // BX 271 [ 0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09,0x09, ], // SP 272 [ 0x86,0x09,0x09,0x09,0x09,0x09,0x09,0x82,0x83, ], // BP 273 [ 0x84,0x09,0x09,0x09,0x80,0x09,0x82,0x09,0x09, ], // SI 274 [ 0x85,0x09,0x09,0x09,0x81,0x09,0x83,0x09,0x09, ] // DI 275 ]; 276 277 assert(scale == 1); 278 rm = EA16rm[base + 1][index + 1]; 279 assert(rm != 9); 280 } 281 c.Irm = rm; 282 c.Isib = sib; 283 c.Irex = rex; 284 c.IFL1 = FLconst; 285 c.IEV1.Vuns = cast(targ_uns)disp; 286 } 287 288 /********************************************* 289 * Build REX, modregrm and sib bytes 290 */ 291 292 uint buildModregrm(int mod, int reg, int rm) 293 { 294 uint m; 295 if (I16) 296 m = modregrm(mod, reg, rm); 297 else 298 { 299 if ((rm & 7) == SP && mod != 3) 300 m = (modregrm(0,4,SP) << 8) | modregrm(mod,reg & 7,4); 301 else 302 m = modregrm(mod,reg & 7,rm & 7); 303 if (reg & 8) 304 m |= REX_R << 16; 305 if (rm & 8) 306 m |= REX_B << 16; 307 } 308 return m; 309 } 310 311 /**************************************** 312 * Generate code for eecontext 313 */ 314 315 void genEEcode() 316 { 317 CodeBuilder cdb; 318 cdb.ctor(); 319 320 eecontext.EEin++; 321 regcon.immed.mval = 0; 322 regm_t retregs = 0; //regmask(eecontext.EEelem.Ety); 323 assert(EEStack.offset >= REGSIZE); 324 cod3_stackadj(cdb, cast(int)(EEStack.offset - REGSIZE)); 325 cdb.gen1(0x50 + SI); // PUSH ESI 326 cdb.genadjesp(cast(int)EEStack.offset); 327 gencodelem(cdb, eecontext.EEelem, &retregs, false); 328 code *c = cdb.finish(); 329 assignaddrc(c); 330 pinholeopt(c,null); 331 jmpaddr(c); 332 eecontext.EEcode = gen1(c, 0xCC); // INT 3 333 eecontext.EEin--; 334 } 335 336 337 /******************************************** 338 * Gen a save/restore sequence for mask of registers. 339 * Params: 340 * regm = mask of registers to save 341 * cdbsave = save code appended here 342 * cdbrestore = restore code appended here 343 * Returns: 344 * amount of stack consumed 345 */ 346 347 uint gensaverestore(regm_t regm,ref CodeBuilder cdbsave,ref CodeBuilder cdbrestore) 348 { 349 //printf("gensaverestore2(%s)\n", regm_str(regm)); 350 regm &= mBP | mES | ALLREGS | XMMREGS | mST0 | mST01; 351 if (!regm) 352 return 0; 353 354 uint stackused = 0; 355 356 code *[regm.sizeof * 8] restore; 357 358 reg_t i; 359 for (i = 0; regm; i++) 360 { 361 if (regm & 1) 362 { 363 code *cs2; 364 if (i == ES && I16) 365 { 366 stackused += REGSIZE; 367 cdbsave.gen1(0x06); // PUSH ES 368 cs2 = gen1(null, 0x07); // POP ES 369 } 370 else if (i == ST0 || i == ST01) 371 { 372 CodeBuilder cdb; 373 cdb.ctor(); 374 gensaverestore87(1 << i, cdbsave, cdb); 375 cs2 = cdb.finish(); 376 } 377 else if (i >= XMM0 || I64 || cgstate.funcarg.size) 378 { uint idx; 379 regsave.save(cdbsave, i, &idx); 380 CodeBuilder cdb; 381 cdb.ctor(); 382 regsave.restore(cdb, i, idx); 383 cs2 = cdb.finish(); 384 } 385 else 386 { 387 stackused += REGSIZE; 388 cdbsave.gen1(0x50 + (i & 7)); // PUSH i 389 cs2 = gen1(null, 0x58 + (i & 7)); // POP i 390 if (i & 8) 391 { code_orrex(cdbsave.last(), REX_B); 392 code_orrex(cs2, REX_B); 393 } 394 } 395 restore[i] = cs2; 396 } 397 else 398 restore[i] = null; 399 regm >>= 1; 400 } 401 402 while (i) 403 { 404 code *c = restore[--i]; 405 if (c) 406 { 407 cdbrestore.append(c); 408 } 409 } 410 411 return stackused; 412 } 413 414 415 /**************************************** 416 * Clean parameters off stack. 417 * Input: 418 * numpara amount to adjust stack pointer 419 * keepmsk mask of registers to not destroy 420 */ 421 422 void genstackclean(ref CodeBuilder cdb,uint numpara,regm_t keepmsk) 423 { 424 //dbg_printf("genstackclean(numpara = %d, stackclean = %d)\n",numpara,cgstate.stackclean); 425 if (numpara && (cgstate.stackclean || STACKALIGN >= 16)) 426 { 427 /+ 428 if (0 && // won't work if operand of scodelem 429 numpara == stackpush && // if this is all those pushed 430 needframe && // and there will be a BP 431 !config.windows && 432 !(regcon.mvar & fregsaved) // and no registers will be pushed 433 ) 434 genregs(cdb,0x89,BP,SP); // MOV SP,BP 435 else 436 +/ 437 { 438 regm_t scratchm = 0; 439 440 if (numpara == REGSIZE && config.flags4 & CFG4space) 441 { 442 scratchm = ALLREGS & ~keepmsk & regcon.used & ~regcon.mvar; 443 } 444 445 if (scratchm) 446 { 447 reg_t r; 448 allocreg(cdb, &scratchm, &r, TYint); 449 cdb.gen1(0x58 + r); // POP r 450 } 451 else 452 cod3_stackadj(cdb, -numpara); 453 } 454 stackpush -= numpara; 455 cdb.genadjesp(-numpara); 456 } 457 } 458 459 /********************************* 460 * Generate code for a logical expression. 461 * Input: 462 * e elem 463 * jcond 464 * bit 1 if true then goto jump address if e 465 * if false then goto jump address if !e 466 * 2 don't call save87() 467 * fltarg FLcode or FLblock, flavor of target if e evaluates to jcond 468 * targ either code or block pointer to destination 469 */ 470 471 void logexp(ref CodeBuilder cdb, elem *e, int jcond, uint fltarg, code *targ) 472 { 473 //printf("logexp(e = %p, jcond = %d)\n", e, jcond); 474 int no87 = (jcond & 2) == 0; 475 docommas(cdb, &e); // scan down commas 476 cgstate.stackclean++; 477 478 code* c, ce; 479 if (!OTleaf(e.Eoper) && !e.Ecount) // if operator and not common sub 480 { 481 switch (e.Eoper) 482 { 483 case OPoror: 484 { 485 con_t regconsave; 486 if (jcond & 1) 487 { 488 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 489 regconsave = regcon; 490 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 491 } 492 else 493 { 494 code *cnop = gennop(null); 495 logexp(cdb, e.EV.E1, jcond | 1, FLcode, cnop); 496 regconsave = regcon; 497 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 498 cdb.append(cnop); 499 } 500 andregcon(®consave); 501 freenode(e); 502 cgstate.stackclean--; 503 return; 504 } 505 506 case OPandand: 507 { 508 con_t regconsave; 509 if (jcond & 1) 510 { 511 code *cnop = gennop(null); // a dummy target address 512 logexp(cdb, e.EV.E1, jcond & ~1, FLcode, cnop); 513 regconsave = regcon; 514 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 515 cdb.append(cnop); 516 } 517 else 518 { 519 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 520 regconsave = regcon; 521 logexp(cdb, e.EV.E2, jcond, fltarg, targ); 522 } 523 andregcon(®consave); 524 freenode(e); 525 cgstate.stackclean--; 526 return; 527 } 528 529 case OPnot: 530 jcond ^= 1; 531 goto case OPbool; 532 533 case OPbool: 534 case OPs8_16: 535 case OPu8_16: 536 case OPs16_32: 537 case OPu16_32: 538 case OPs32_64: 539 case OPu32_64: 540 case OPu32_d: 541 case OPd_ld: 542 logexp(cdb, e.EV.E1, jcond, fltarg, targ); 543 freenode(e); 544 cgstate.stackclean--; 545 return; 546 547 case OPcond: 548 { 549 code *cnop2 = gennop(null); // addresses of start of leaves 550 code *cnop = gennop(null); 551 logexp(cdb, e.EV.E1, false, FLcode, cnop2); // eval condition 552 con_t regconold = regcon; 553 logexp(cdb, e.EV.E2.EV.E1, jcond, fltarg, targ); 554 genjmp(cdb, JMP, FLcode, cast(block *) cnop); // skip second leaf 555 556 con_t regconsave = regcon; 557 regcon = regconold; 558 559 cdb.append(cnop2); 560 logexp(cdb, e.EV.E2.EV.E2, jcond, fltarg, targ); 561 andregcon(®conold); 562 andregcon(®consave); 563 freenode(e.EV.E2); 564 freenode(e); 565 cdb.append(cnop); 566 cgstate.stackclean--; 567 return; 568 } 569 570 default: 571 break; 572 } 573 } 574 575 /* Special code for signed long compare. 576 * Not necessary for I64 until we do cents. 577 */ 578 if (OTrel2(e.Eoper) && // if < <= >= > 579 !e.Ecount && 580 ( (I16 && tybasic(e.EV.E1.Ety) == TYlong && tybasic(e.EV.E2.Ety) == TYlong) || 581 (I32 && tybasic(e.EV.E1.Ety) == TYllong && tybasic(e.EV.E2.Ety) == TYllong)) 582 ) 583 { 584 longcmp(cdb, e, jcond != 0, fltarg, targ); 585 cgstate.stackclean--; 586 return; 587 } 588 589 regm_t retregs = mPSW; // return result in flags 590 opcode_t op = jmpopcode(e); // get jump opcode 591 if (!(jcond & 1)) 592 op ^= 0x101; // toggle jump condition(s) 593 codelem(cdb, e, &retregs, true); // evaluate elem 594 if (no87) 595 cse_flush(cdb,no87); // flush CSE's to memory 596 genjmp(cdb, op, fltarg, cast(block *) targ); // generate jmp instruction 597 cgstate.stackclean--; 598 } 599 600 /****************************** 601 * Routine to aid in setting things up for gen(). 602 * Look for common subexpression. 603 * Can handle indirection operators, but not if they're common subs. 604 * Input: 605 * e -> elem where we get some of the data from 606 * cs -> partially filled code to add 607 * op = opcode 608 * reg = reg field of (mod reg r/m) 609 * offset = data to be added to Voffset field 610 * keepmsk = mask of registers we must not destroy 611 * desmsk = mask of registers destroyed by executing the instruction 612 * Returns: 613 * pointer to code generated 614 */ 615 616 void loadea(ref CodeBuilder cdb,elem *e,code *cs,uint op,uint reg,targ_size_t offset, 617 regm_t keepmsk,regm_t desmsk) 618 { 619 code* c, cg, cd; 620 621 debug 622 if (debugw) 623 printf("loadea: e=%p cs=%p op=x%x reg=%s offset=%lld keepmsk=%s desmsk=%s\n", 624 e, cs, op, regstring[reg], cast(ulong)offset, regm_str(keepmsk), regm_str(desmsk)); 625 assert(e); 626 cs.Iflags = 0; 627 cs.Irex = 0; 628 cs.Iop = op; 629 tym_t tym = e.Ety; 630 int sz = tysize(tym); 631 632 /* Determine if location we want to get is in a register. If so, */ 633 /* substitute the register for the EA. */ 634 /* Note that operators don't go through this. CSE'd operators are */ 635 /* picked up by comsub(). */ 636 if (e.Ecount && /* if cse */ 637 e.Ecount != e.Ecomsub && /* and cse was generated */ 638 op != LEA && op != 0xC4 && /* and not an LEA or LES */ 639 (op != 0xFF || reg != 3) && /* and not CALLF MEM16 */ 640 (op & 0xFFF8) != 0xD8) // and not 8087 opcode 641 { 642 assert(OTleaf(e.Eoper)); /* can't handle this */ 643 regm_t rm = regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; // possible regs 644 if (op == 0xFF && reg == 6) 645 rm &= ~XMMREGS; // can't PUSH an XMM register 646 if (sz > REGSIZE) // value is in 2 or 4 registers 647 { 648 if (I16 && sz == 8) // value is in 4 registers 649 { 650 static immutable regm_t[4] rmask = [ mDX,mCX,mBX,mAX ]; 651 rm &= rmask[cast(size_t)(offset >> 1)]; 652 } 653 else if (offset) 654 rm &= mMSW; /* only high words */ 655 else 656 rm &= mLSW; /* only low words */ 657 } 658 for (uint i = 0; rm; i++) 659 { 660 if (mask(i) & rm) 661 { 662 if (regcon.cse.value[i] == e && // if register has elem 663 /* watch out for a CWD destroying DX */ 664 !(i == DX && op == 0xF7 && desmsk & mDX)) 665 { 666 /* if ES, then it can only be a load */ 667 if (i == ES) 668 { 669 if (op != 0x8B) 670 break; // not a load 671 cs.Iop = 0x8C; /* MOV reg,ES */ 672 cs.Irm = modregrm(3, 0, reg & 7); 673 if (reg & 8) 674 code_orrex(cs, REX_B); 675 } 676 else // XXX reg,i 677 { 678 cs.Irm = modregrm(3, reg & 7, i & 7); 679 if (reg & 8) 680 cs.Irex |= REX_R; 681 if (i & 8) 682 cs.Irex |= REX_B; 683 if (sz == 1 && I64 && (i >= 4 || reg >= 4)) 684 cs.Irex |= REX; 685 if (I64 && (sz == 8 || sz == 16)) 686 cs.Irex |= REX_W; 687 } 688 goto L2; 689 } 690 rm &= ~mask(i); 691 } 692 } 693 } 694 695 getlvalue(cdb, cs, e, keepmsk); 696 if (offset == REGSIZE) 697 getlvalue_msw(cs); 698 else 699 cs.IEV1.Voffset += offset; 700 if (I64) 701 { 702 if (reg >= 4 && sz == 1) // if byte register 703 // Can only address those 8 bit registers if a REX byte is present 704 cs.Irex |= REX; 705 if ((op & 0xFFFFFFF8) == 0xD8) 706 cs.Irex &= ~REX_W; // not needed for x87 ops 707 if (mask(reg) & XMMREGS && 708 (op == LODSD || op == STOSD)) 709 cs.Irex &= ~REX_W; // not needed for xmm ops 710 } 711 code_newreg(cs, reg); // OR in reg field 712 if (!I16) 713 { 714 if (reg == 6 && op == 0xFF || /* don't PUSH a word */ 715 op == 0x0FB7 || op == 0x0FBF || /* MOVZX/MOVSX */ 716 (op & 0xFFF8) == 0xD8 || /* 8087 instructions */ 717 op == LEA) /* LEA */ 718 { 719 cs.Iflags &= ~CFopsize; 720 if (reg == 6 && op == 0xFF) // if PUSH 721 cs.Irex &= ~REX_W; // REX is ignored for PUSH anyway 722 } 723 } 724 else if ((op & 0xFFF8) == 0xD8 && ADDFWAIT()) 725 cs.Iflags |= CFwait; 726 L2: 727 getregs(cdb, desmsk); // save any regs we destroy 728 729 /* KLUDGE! fix up DX for divide instructions */ 730 if (op == 0xF7 && desmsk == (mAX|mDX)) /* if we need to fix DX */ 731 { 732 if (reg == 7) /* if IDIV */ 733 { 734 cdb.gen1(0x99); // CWD 735 if (I64 && sz == 8) 736 code_orrex(cdb.last(), REX_W); 737 } 738 else if (reg == 6) // if DIV 739 genregs(cdb, 0x33, DX, DX); // XOR DX,DX 740 } 741 742 // Eliminate MOV reg,reg 743 if ((cs.Iop & ~3) == 0x88 && 744 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 745 { 746 uint r = cs.Irm & 7; 747 if (cs.Irex & REX_B) 748 r |= 8; 749 if (r == reg) 750 cs.Iop = NOP; 751 } 752 753 // Eliminate MOV xmmreg,xmmreg 754 if ((cs.Iop & ~(LODSD ^ STOSS)) == LODSD && // detect LODSD, LODSS, STOSD, STOSS 755 (cs.Irm & 0xC7) == modregrm(3,0,reg & 7)) 756 { 757 reg_t r = cs.Irm & 7; 758 if (cs.Irex & REX_B) 759 r |= 8; 760 if (r == (reg - XMM0)) 761 cs.Iop = NOP; 762 } 763 764 cdb.gen(cs); 765 } 766 767 768 /************************** 769 * Get addressing mode. 770 */ 771 772 uint getaddrmode(regm_t idxregs) 773 { 774 uint mode; 775 776 if (I16) 777 { 778 static ubyte error() { assert(0); } 779 780 mode = (idxregs & mBX) ? modregrm(2,0,7) : /* [BX] */ 781 (idxregs & mDI) ? modregrm(2,0,5): /* [DI] */ 782 (idxregs & mSI) ? modregrm(2,0,4): /* [SI] */ 783 error(); 784 } 785 else 786 { 787 const reg = findreg(idxregs & (ALLREGS | mBP)); 788 if (reg == R12) 789 mode = (REX_B << 16) | (modregrm(0,4,4) << 8) | modregrm(2,0,4); 790 else 791 mode = modregrmx(2,0,reg); 792 } 793 return mode; 794 } 795 796 void setaddrmode(code *c, regm_t idxregs) 797 { 798 uint mode = getaddrmode(idxregs); 799 c.Irm = mode & 0xFF; 800 c.Isib = (mode >> 8) & 0xFF; 801 c.Irex &= ~REX_B; 802 c.Irex |= mode >> 16; 803 } 804 805 /********************************************** 806 */ 807 808 void getlvalue_msw(code *c) 809 { 810 if (c.IFL1 == FLreg) 811 { 812 const regmsw = c.IEV1.Vsym.Sregmsw; 813 c.Irm = (c.Irm & ~7) | (regmsw & 7); 814 if (regmsw & 8) 815 c.Irex |= REX_B; 816 else 817 c.Irex &= ~REX_B; 818 } 819 else 820 c.IEV1.Voffset += REGSIZE; 821 } 822 823 /********************************************** 824 */ 825 826 void getlvalue_lsw(code *c) 827 { 828 if (c.IFL1 == FLreg) 829 { 830 const reglsw = c.IEV1.Vsym.Sreglsw; 831 c.Irm = (c.Irm & ~7) | (reglsw & 7); 832 if (reglsw & 8) 833 c.Irex |= REX_B; 834 else 835 c.Irex &= ~REX_B; 836 } 837 else 838 c.IEV1.Voffset -= REGSIZE; 839 } 840 841 /****************** 842 * Compute addressing mode. 843 * Generate & return sequence of code (if any). 844 * Return in cs the info on it. 845 * Input: 846 * pcs -> where to store data about addressing mode 847 * e -> the lvalue elem 848 * keepmsk mask of registers we must not destroy or use 849 * if (keepmsk & RMstore), this will be only a store operation 850 * into the lvalue 851 * if (keepmsk & RMload), this will be a read operation only 852 */ 853 854 void getlvalue(ref CodeBuilder cdb,code *pcs,elem *e,regm_t keepmsk) 855 { 856 uint fl, f, opsave; 857 elem* e1, e11, e12; 858 bool e1isadd, e1free; 859 reg_t reg; 860 tym_t e1ty; 861 Symbol* s; 862 863 //printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 864 //elem_print(e); 865 assert(e); 866 elem_debug(e); 867 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 868 { 869 s = e.EV.Vsym; 870 fl = s.Sfl; 871 if (tyfloating(s.ty())) 872 objmod.fltused(); 873 } 874 else 875 fl = FLoper; 876 pcs.IFL1 = cast(ubyte)fl; 877 pcs.Iflags = CFoff; /* only want offsets */ 878 pcs.Irex = 0; 879 pcs.IEV1.Voffset = 0; 880 881 tym_t ty = e.Ety; 882 uint sz = tysize(ty); 883 if (tyfloating(ty)) 884 objmod.fltused(); 885 if (I64 && (sz == 8 || sz == 16) && !tyvector(ty)) 886 pcs.Irex |= REX_W; 887 if (!I16 && sz == SHORTSIZE) 888 pcs.Iflags |= CFopsize; 889 if (ty & mTYvolatile) 890 pcs.Iflags |= CFvolatile; 891 892 switch (fl) 893 { 894 case FLoper: 895 debug 896 if (debugw) printf("getlvalue(e = %p, keepmsk = %s)\n", e, regm_str(keepmsk)); 897 898 switch (e.Eoper) 899 { 900 case OPadd: // this way when we want to do LEA 901 e1 = e; 902 e1free = false; 903 e1isadd = true; 904 break; 905 906 case OPind: 907 case OPpostinc: // when doing (*p++ = ...) 908 case OPpostdec: // when doing (*p-- = ...) 909 case OPbt: 910 case OPbtc: 911 case OPbtr: 912 case OPbts: 913 case OPvecfill: 914 e1 = e.EV.E1; 915 e1free = true; 916 e1isadd = e1.Eoper == OPadd; 917 break; 918 919 default: 920 printf("function: %s\n", funcsym_p.Sident.ptr); 921 elem_print(e); 922 assert(0); 923 } 924 e1ty = tybasic(e1.Ety); 925 if (e1isadd) 926 { 927 e12 = e1.EV.E2; 928 e11 = e1.EV.E1; 929 } 930 931 /* First see if we can replace *(e+&v) with 932 * MOV idxreg,e 933 * EA = [ES:] &v+idxreg 934 */ 935 f = FLconst; 936 937 /* Is address of `s` relative to RIP ? 938 */ 939 static bool relativeToRIP(Symbol* s) 940 { 941 if (!I64) 942 return false; 943 if (config.exe == EX_WIN64) 944 return true; 945 if (config.flags3 & CFG3pie) 946 { 947 if (s.Sfl == FLtlsdata || s.ty() & mTYthread) 948 { 949 if (s.Sclass == SCglobal || s.Sclass == SCstatic || s.Sclass == SClocstat) 950 return false; 951 } 952 return true; 953 } 954 else 955 return (config.flags3 & CFG3pic) != 0; 956 } 957 958 if (e1isadd && 959 ((e12.Eoper == OPrelconst && 960 !relativeToRIP(e12.EV.Vsym) && 961 (f = el_fl(e12)) != FLfardata 962 ) || 963 (e12.Eoper == OPconst && !I16 && !e1.Ecount && (!I64 || el_signx32(e12)))) && 964 e1.Ecount == e1.Ecomsub && 965 (!e1.Ecount || (~keepmsk & ALLREGS & mMSW) || (e1ty != TYfptr && e1ty != TYhptr)) && 966 tysize(e11.Ety) == REGSIZE 967 ) 968 { 969 uint t; /* component of r/m field */ 970 int ss; 971 int ssi; 972 973 if (e12.Eoper == OPrelconst) 974 f = el_fl(e12); 975 /*assert(datafl[f]);*/ /* what if addr of func? */ 976 if (!I16) 977 { /* Any register can be an index register */ 978 regm_t idxregs = allregs & ~keepmsk; 979 assert(idxregs); 980 981 /* See if e1.EV.E1 can be a scaled index */ 982 ss = isscaledindex(e11); 983 if (ss) 984 { 985 /* Load index register with result of e11.EV.E1 */ 986 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 987 reg = findreg(idxregs); 988 { 989 t = stackfl[f] ? 2 : 0; 990 pcs.Irm = modregrm(t, 0, 4); 991 pcs.Isib = modregrm(ss, reg & 7, 5); 992 if (reg & 8) 993 pcs.Irex |= REX_X; 994 } 995 } 996 else if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && 997 !e11.Ecount && 998 e11.EV.E2.Eoper == OPconst && 999 (ssi = ssindex(e11.Eoper, e11.EV.E2.EV.Vuns)) != 0 1000 ) 1001 { 1002 regm_t scratchm; 1003 1004 char ssflags = ssindex_array[ssi].ssflags; 1005 if (ssflags & SSFLnobp && stackfl[f]) 1006 goto L6; 1007 1008 // Load index register with result of e11.EV.E1 1009 scodelem(cdb, e11.EV.E1, &idxregs, keepmsk, true); 1010 reg = findreg(idxregs); 1011 1012 int ss1 = ssindex_array[ssi].ss1; 1013 if (ssflags & SSFLlea) 1014 { 1015 assert(!stackfl[f]); 1016 pcs.Irm = modregrm(2,0,4); 1017 pcs.Isib = modregrm(ss1, reg & 7, reg & 7); 1018 if (reg & 8) 1019 pcs.Irex |= REX_X | REX_B; 1020 } 1021 else 1022 { 1023 int rbase; 1024 reg_t r; 1025 1026 scratchm = ALLREGS & ~keepmsk; 1027 allocreg(cdb, &scratchm, &r, TYint); 1028 1029 if (ssflags & SSFLnobase1) 1030 { 1031 t = 0; 1032 rbase = 5; 1033 } 1034 else 1035 { 1036 t = 0; 1037 rbase = reg; 1038 if (rbase == BP || rbase == R13) 1039 { 1040 static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1]; 1041 1042 // IMUL r,BP,imm32 1043 cdb.genc2(0x69, modregxrmx(3, r, rbase), imm32[ss1]); 1044 goto L7; 1045 } 1046 } 1047 1048 cdb.gen2sib(LEA, modregxrm(t, r, 4), modregrm(ss1, reg & 7 ,rbase & 7)); 1049 if (reg & 8) 1050 code_orrex(cdb.last(), REX_X); 1051 if (rbase & 8) 1052 code_orrex(cdb.last(), REX_B); 1053 if (I64) 1054 code_orrex(cdb.last(), REX_W); 1055 1056 if (ssflags & SSFLnobase1) 1057 { 1058 cdb.last().IFL1 = FLconst; 1059 cdb.last().IEV1.Vuns = 0; 1060 } 1061 L7: 1062 if (ssflags & SSFLnobase) 1063 { 1064 t = stackfl[f] ? 2 : 0; 1065 rbase = 5; 1066 } 1067 else 1068 { 1069 t = 2; 1070 rbase = r; 1071 assert(rbase != BP); 1072 } 1073 pcs.Irm = modregrm(t, 0, 4); 1074 pcs.Isib = modregrm(ssindex_array[ssi].ss2, r & 7, rbase & 7); 1075 if (r & 8) 1076 pcs.Irex |= REX_X; 1077 if (rbase & 8) 1078 pcs.Irex |= REX_B; 1079 } 1080 freenode(e11.EV.E2); 1081 freenode(e11); 1082 } 1083 else 1084 { 1085 L6: 1086 /* Load index register with result of e11 */ 1087 scodelem(cdb, e11, &idxregs, keepmsk, true); 1088 setaddrmode(pcs, idxregs); 1089 if (stackfl[f]) /* if we need [EBP] too */ 1090 { 1091 uint idx = pcs.Irm & 7; 1092 if (pcs.Irex & REX_B) 1093 pcs.Irex = (pcs.Irex & ~REX_B) | REX_X; 1094 pcs.Isib = modregrm(0, idx, BP); 1095 pcs.Irm = modregrm(2, 0, 4); 1096 } 1097 } 1098 } 1099 else 1100 { 1101 regm_t idxregs = IDXREGS & ~keepmsk; /* only these can be index regs */ 1102 assert(idxregs); 1103 if (stackfl[f]) /* if stack data type */ 1104 { 1105 idxregs &= mSI | mDI; /* BX can't index off stack */ 1106 if (!idxregs) goto L1; /* index regs aren't avail */ 1107 t = 6; /* [BP+SI+disp] */ 1108 } 1109 else 1110 t = 0; /* [SI + disp] */ 1111 scodelem(cdb, e11, &idxregs, keepmsk, true); // load idx reg 1112 pcs.Irm = cast(ubyte)(getaddrmode(idxregs) ^ t); 1113 } 1114 if (f == FLpara) 1115 refparam = true; 1116 else if (f == FLauto || f == FLbprel || f == FLfltreg || f == FLfast) 1117 reflocal = true; 1118 else if (f == FLcsdata || tybasic(e12.Ety) == TYcptr) 1119 pcs.Iflags |= CFcs; 1120 else 1121 assert(f != FLreg); 1122 pcs.IFL1 = cast(ubyte)f; 1123 if (f != FLconst) 1124 pcs.IEV1.Vsym = e12.EV.Vsym; 1125 pcs.IEV1.Voffset = e12.EV.Voffset; /* += ??? */ 1126 1127 /* If e1 is a CSE, we must generate an addressing mode */ 1128 /* but also leave EA in registers so others can use it */ 1129 if (e1.Ecount) 1130 { 1131 uint flagsave; 1132 1133 regm_t idxregs = IDXREGS & ~keepmsk; 1134 allocreg(cdb, &idxregs, ®, TYoffset); 1135 1136 /* If desired result is a far pointer, we'll have */ 1137 /* to load another register with the segment of v */ 1138 if (e1ty == TYfptr) 1139 { 1140 reg_t msreg; 1141 1142 idxregs |= mMSW & ALLREGS & ~keepmsk; 1143 allocreg(cdb, &idxregs, &msreg, TYfptr); 1144 msreg = findregmsw(idxregs); 1145 /* MOV msreg,segreg */ 1146 genregs(cdb, 0x8C, segfl[f], msreg); 1147 } 1148 opsave = pcs.Iop; 1149 flagsave = pcs.Iflags; 1150 ubyte rexsave = pcs.Irex; 1151 pcs.Iop = LEA; 1152 code_newreg(pcs, reg); 1153 if (!I16) 1154 pcs.Iflags &= ~CFopsize; 1155 if (I64) 1156 pcs.Irex |= REX_W; 1157 cdb.gen(pcs); // LEA idxreg,EA 1158 cssave(e1,idxregs,true); 1159 if (!I16) 1160 { 1161 pcs.Iflags = flagsave; 1162 pcs.Irex = rexsave; 1163 } 1164 if (stackfl[f] && (config.wflags & WFssneds)) // if pointer into stack 1165 pcs.Iflags |= CFss; // add SS: override 1166 pcs.Iop = opsave; 1167 pcs.IFL1 = FLoffset; 1168 pcs.IEV1.Vuns = 0; 1169 setaddrmode(pcs, idxregs); 1170 } 1171 freenode(e12); 1172 if (e1free) 1173 freenode(e1); 1174 goto Lptr; 1175 } 1176 1177 L1: 1178 1179 /* The rest of the cases could be a far pointer */ 1180 1181 regm_t idxregs; 1182 idxregs = (I16 ? IDXREGS : allregs) & ~keepmsk; // only these can be index regs 1183 assert(idxregs); 1184 if (!I16 && 1185 (sz == REGSIZE || (I64 && sz == 4)) && 1186 keepmsk & RMstore) 1187 idxregs |= regcon.mvar; 1188 1189 switch (e1ty) 1190 { 1191 case TYfptr: /* if far pointer */ 1192 case TYhptr: 1193 idxregs = (mES | IDXREGS) & ~keepmsk; // need segment too 1194 assert(idxregs & mES); 1195 pcs.Iflags |= CFes; /* ES segment override */ 1196 break; 1197 1198 case TYsptr: /* if pointer to stack */ 1199 if (config.wflags & WFssneds) // if SS != DS 1200 pcs.Iflags |= CFss; /* then need SS: override */ 1201 break; 1202 1203 case TYfgPtr: 1204 if (I32) 1205 pcs.Iflags |= CFgs; 1206 else if (I64) 1207 pcs.Iflags |= CFfs; 1208 else 1209 assert(0); 1210 break; 1211 1212 case TYcptr: /* if pointer to code */ 1213 pcs.Iflags |= CFcs; /* then need CS: override */ 1214 break; 1215 1216 default: 1217 break; 1218 } 1219 pcs.IFL1 = FLoffset; 1220 pcs.IEV1.Vuns = 0; 1221 1222 /* see if we can replace *(e+c) with 1223 * MOV idxreg,e 1224 * [MOV ES,segment] 1225 * EA = [ES:]c[idxreg] 1226 */ 1227 if (e1isadd && e12.Eoper == OPconst && 1228 (!I64 || el_signx32(e12)) && 1229 (tysize(e12.Ety) == REGSIZE || (I64 && tysize(e12.Ety) == 4)) && 1230 (!e1.Ecount || !e1free) 1231 ) 1232 { 1233 int ss; 1234 1235 pcs.IEV1.Vuns = e12.EV.Vuns; 1236 freenode(e12); 1237 if (e1free) freenode(e1); 1238 if (!I16 && e11.Eoper == OPadd && !e11.Ecount && 1239 tysize(e11.Ety) == REGSIZE) 1240 { 1241 e12 = e11.EV.E2; 1242 e11 = e11.EV.E1; 1243 e1 = e1.EV.E1; 1244 e1free = true; 1245 goto L4; 1246 } 1247 if (!I16 && (ss = isscaledindex(e11)) != 0) 1248 { // (v * scale) + const 1249 cdisscaledindex(cdb, e11, &idxregs, keepmsk); 1250 reg = findreg(idxregs); 1251 pcs.Irm = modregrm(0, 0, 4); 1252 pcs.Isib = modregrm(ss, reg & 7, 5); 1253 if (reg & 8) 1254 pcs.Irex |= REX_X; 1255 } 1256 else 1257 { 1258 scodelem(cdb, e11, &idxregs, keepmsk, true); // load index reg 1259 setaddrmode(pcs, idxregs); 1260 } 1261 goto Lptr; 1262 } 1263 1264 /* Look for *(v1 + v2) 1265 * EA = [v1][v2] 1266 */ 1267 1268 if (!I16 && e1isadd && (!e1.Ecount || !e1free) && 1269 (_tysize[e1ty] == REGSIZE || (I64 && _tysize[e1ty] == 4))) 1270 { 1271 L4: 1272 regm_t idxregs2; 1273 uint base, index; 1274 1275 // Look for *(v1 + v2 << scale) 1276 int ss = isscaledindex(e12); 1277 if (ss) 1278 { 1279 scodelem(cdb, e11, &idxregs, keepmsk, true); 1280 idxregs2 = allregs & ~(idxregs | keepmsk); 1281 cdisscaledindex(cdb, e12, &idxregs2, keepmsk | idxregs); 1282 } 1283 1284 // Look for *(v1 << scale + v2) 1285 else if ((ss = isscaledindex(e11)) != 0) 1286 { 1287 idxregs2 = idxregs; 1288 cdisscaledindex(cdb, e11, &idxregs2, keepmsk); 1289 idxregs = allregs & ~(idxregs2 | keepmsk); 1290 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1291 } 1292 // Look for *(((v1 << scale) + c1) + v2) 1293 else if (e11.Eoper == OPadd && !e11.Ecount && 1294 e11.EV.E2.Eoper == OPconst && 1295 (ss = isscaledindex(e11.EV.E1)) != 0 1296 ) 1297 { 1298 pcs.IEV1.Vuns = e11.EV.E2.EV.Vuns; 1299 idxregs2 = idxregs; 1300 cdisscaledindex(cdb, e11.EV.E1, &idxregs2, keepmsk); 1301 idxregs = allregs & ~(idxregs2 | keepmsk); 1302 scodelem(cdb, e12, &idxregs, keepmsk | idxregs2, true); 1303 freenode(e11.EV.E2); 1304 freenode(e11); 1305 } 1306 else 1307 { 1308 scodelem(cdb, e11, &idxregs, keepmsk, true); 1309 idxregs2 = allregs & ~(idxregs | keepmsk); 1310 scodelem(cdb, e12, &idxregs2, keepmsk | idxregs, true); 1311 } 1312 base = findreg(idxregs); 1313 index = findreg(idxregs2); 1314 pcs.Irm = modregrm(2, 0, 4); 1315 pcs.Isib = modregrm(ss, index & 7, base & 7); 1316 if (index & 8) 1317 pcs.Irex |= REX_X; 1318 if (base & 8) 1319 pcs.Irex |= REX_B; 1320 if (e1free) 1321 freenode(e1); 1322 1323 goto Lptr; 1324 } 1325 1326 /* give up and replace *e1 with 1327 * MOV idxreg,e 1328 * EA = 0[idxreg] 1329 * pinholeopt() will usually correct the 0, we need it in case 1330 * we have a pointer to a long and need an offset to the second 1331 * word. 1332 */ 1333 1334 assert(e1free); 1335 scodelem(cdb, e1, &idxregs, keepmsk, true); // load index register 1336 setaddrmode(pcs, idxregs); 1337 Lptr: 1338 if (config.flags3 & CFG3ptrchk) 1339 cod3_ptrchk(cdb, pcs, keepmsk); // validate pointer code 1340 break; 1341 1342 case FLdatseg: 1343 assert(0); 1344 static if (0) 1345 { 1346 pcs.Irm = modregrm(0, 0, BPRM); 1347 pcs.IEVpointer1 = e.EVpointer; 1348 break; 1349 } 1350 1351 case FLfltreg: 1352 reflocal = true; 1353 pcs.Irm = modregrm(2, 0, BPRM); 1354 pcs.IEV1.Vint = 0; 1355 break; 1356 1357 case FLreg: 1358 goto L2; 1359 1360 case FLpara: 1361 if (s.Sclass == SCshadowreg) 1362 goto case FLfast; 1363 Lpara: 1364 refparam = true; 1365 pcs.Irm = modregrm(2, 0, BPRM); 1366 goto L2; 1367 1368 case FLauto: 1369 case FLfast: 1370 if (regParamInPreg(s)) 1371 { 1372 regm_t pregm = s.Spregm(); 1373 /* See if the parameter is still hanging about in a register, 1374 * and so can we load from that register instead. 1375 */ 1376 if (regcon.params & pregm /*&& s.Spreg2 == NOREG && !(pregm & XMMREGS)*/) 1377 { 1378 if (keepmsk & RMload && !anyiasm) 1379 { 1380 auto voffset = e.EV.Voffset; 1381 if (sz <= REGSIZE) 1382 { 1383 const reg_t preg = (voffset >= REGSIZE) ? s.Spreg2 : s.Spreg; 1384 if (voffset >= REGSIZE) 1385 voffset -= REGSIZE; 1386 1387 /* preg could be NOREG if it's a variadic function and we're 1388 * in Win64 shadow regs and we're offsetting to get to the start 1389 * of the variadic args. 1390 */ 1391 if (preg != NOREG && regcon.params & mask(preg)) 1392 { 1393 //printf("sz %d, preg %s, Voffset %d\n", cast(int)sz, regm_str(mask(preg)), cast(int)voffset); 1394 if (mask(preg) & XMMREGS && sz != REGSIZE) 1395 { 1396 /* The following fails with this from std.math on Linux64: 1397 void main() 1398 { 1399 alias T = float; 1400 T x = T.infinity; 1401 T e = T.infinity; 1402 int eptr; 1403 T v = frexp(x, eptr); 1404 assert(isIdentical(e, v)); 1405 } 1406 */ 1407 } 1408 else if (voffset == 0) 1409 { 1410 pcs.Irm = modregrm(3, 0, preg & 7); 1411 if (preg & 8) 1412 pcs.Irex |= REX_B; 1413 if (I64 && sz == 1 && preg >= 4) 1414 pcs.Irex |= REX; 1415 regcon.used |= mask(preg); 1416 break; 1417 } 1418 else if (voffset == 1 && sz == 1 && preg < 4) 1419 { 1420 pcs.Irm = modregrm(3, 0, 4 | preg); // use H register 1421 regcon.used |= mask(preg); 1422 break; 1423 } 1424 } 1425 } 1426 } 1427 else 1428 regcon.params &= ~pregm; 1429 } 1430 } 1431 if (s.Sclass == SCshadowreg) 1432 goto Lpara; 1433 goto case FLbprel; 1434 1435 case FLbprel: 1436 reflocal = true; 1437 pcs.Irm = modregrm(2, 0, BPRM); 1438 goto L2; 1439 1440 case FLextern: 1441 if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) 1442 { 1443 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1444 { 1445 assert(0); 1446 } 1447 else static if (TARGET_WINDOS) 1448 { 1449 if (I64) 1450 { // GS:[88] 1451 pcs.Irm = modregrm(0, 0, 4); 1452 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1453 pcs.IFL1 = FLconst; 1454 pcs.IEV1.Vuns = 88; 1455 pcs.Iflags = CFgs; 1456 pcs.Irex |= REX_W; 1457 break; 1458 } 1459 else 1460 { 1461 pcs.Iflags |= CFfs; // add FS: override 1462 } 1463 } 1464 } 1465 if (s.ty() & mTYcs && cast(bool) LARGECODE) 1466 goto Lfardata; 1467 goto L3; 1468 1469 case FLdata: 1470 case FLudata: 1471 case FLcsdata: 1472 case FLgot: 1473 case FLgotoff: 1474 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1475 { 1476 case FLtlsdata: 1477 } 1478 L3: 1479 pcs.Irm = modregrm(0, 0, BPRM); 1480 L2: 1481 if (fl == FLreg) 1482 { 1483 //printf("test: FLreg, %s %d regcon.mvar = %s\n", 1484 // s.Sident.ptr, cast(int)e.EV.Voffset, regm_str(regcon.mvar)); 1485 if (!(s.Sregm & regcon.mvar)) 1486 symbol_print(s); 1487 assert(s.Sregm & regcon.mvar); 1488 1489 /* Attempting to paint a float as an integer or an integer as a float 1490 * will cause serious problems since the EA is loaded separatedly from 1491 * the opcode. The only way to deal with this is to prevent enregistering 1492 * such variables. 1493 */ 1494 if (tyxmmreg(ty) && !(s.Sregm & XMMREGS) || 1495 !tyxmmreg(ty) && (s.Sregm & XMMREGS)) 1496 cgreg_unregister(s.Sregm); 1497 1498 if ( 1499 s.Sclass == SCregpar || 1500 s.Sclass == SCparameter) 1501 { refparam = true; 1502 reflocal = true; // kludge to set up prolog 1503 } 1504 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1505 if (s.Sreglsw & 8) 1506 pcs.Irex |= REX_B; 1507 if (e.EV.Voffset == REGSIZE && sz == REGSIZE) 1508 { 1509 pcs.Irm = modregrm(3, 0, s.Sregmsw & 7); 1510 if (s.Sregmsw & 8) 1511 pcs.Irex |= REX_B; 1512 else 1513 pcs.Irex &= ~REX_B; 1514 } 1515 else if (e.EV.Voffset == 1 && sz == 1) 1516 { 1517 assert(s.Sregm & BYTEREGS); 1518 assert(s.Sreglsw < 4); 1519 pcs.Irm |= 4; // use 2nd byte of register 1520 } 1521 else 1522 { 1523 assert(!e.EV.Voffset); 1524 if (I64 && sz == 1 && s.Sreglsw >= 4) 1525 pcs.Irex |= REX; 1526 } 1527 } 1528 else if (s.ty() & mTYcs && !(fl == FLextern && LARGECODE)) 1529 { 1530 pcs.Iflags |= CFcs | CFoff; 1531 } 1532 if (config.flags3 & CFG3pic && 1533 (fl == FLtlsdata || s.ty() & mTYthread)) 1534 { 1535 if (I32) 1536 { 1537 if (config.flags3 & CFG3pie) 1538 { 1539 pcs.Iflags |= CFgs; 1540 } 1541 } 1542 else if (I64) 1543 { 1544 if (config.flags3 & CFG3pie && 1545 (s.Sclass == SCglobal || s.Sclass == SCstatic || s.Sclass == SClocstat)) 1546 { 1547 pcs.Iflags |= CFfs; 1548 pcs.Irm = modregrm(0, 0, 4); 1549 pcs.Isib = modregrm(0, 4, 5); // don't use [RIP] addressing 1550 } 1551 else 1552 { 1553 pcs.Iflags |= CFopsize; 1554 pcs.Irex = 0x48; 1555 } 1556 } 1557 } 1558 pcs.IEV1.Vsym = s; 1559 pcs.IEV1.Voffset = e.EV.Voffset; 1560 if (sz == 1) 1561 { /* Don't use SI or DI for this variable */ 1562 s.Sflags |= GTbyte; 1563 if (I64 ? e.EV.Voffset > 0 : e.EV.Voffset > 1) 1564 { 1565 debug if (debugr) printf("'%s' not reg cand due to byte offset\n", s.Sident.ptr); 1566 s.Sflags &= ~GTregcand; 1567 } 1568 } 1569 else if (e.EV.Voffset || sz > tysize(s.Stype.Tty)) 1570 { 1571 debug if (debugr) printf("'%s' not reg cand due to offset or size\n", s.Sident.ptr); 1572 s.Sflags &= ~GTregcand; 1573 } 1574 1575 if (config.fpxmmregs && tyfloating(s.ty()) && !tyfloating(ty)) 1576 { 1577 debug if (debugr) printf("'%s' not reg cand due to mix float and int\n", s.Sident.ptr); 1578 // Can't successfully mix XMM register variables accessed as integers 1579 s.Sflags &= ~GTregcand; 1580 } 1581 1582 if (!(keepmsk & RMstore)) // if not store only 1583 s.Sflags |= SFLread; // assume we are doing a read 1584 break; 1585 1586 case FLpseudo: 1587 version (MARS) 1588 { 1589 { 1590 getregs(cdb, mask(s.Sreglsw)); 1591 pcs.Irm = modregrm(3, 0, s.Sreglsw & 7); 1592 if (s.Sreglsw & 8) 1593 pcs.Irex |= REX_B; 1594 if (e.EV.Voffset == 1 && sz == 1) 1595 { assert(s.Sregm & BYTEREGS); 1596 assert(s.Sreglsw < 4); 1597 pcs.Irm |= 4; // use 2nd byte of register 1598 } 1599 else 1600 { assert(!e.EV.Voffset); 1601 if (I64 && sz == 1 && s.Sreglsw >= 4) 1602 pcs.Irex |= REX; 1603 } 1604 break; 1605 } 1606 } 1607 else 1608 { 1609 { 1610 uint u = s.Sreglsw; 1611 getregs(cdb, pseudomask[u]); 1612 pcs.Irm = modregrm(3, 0, pseudoreg[u] & 7); 1613 break; 1614 } 1615 } 1616 1617 case FLfardata: 1618 case FLfunc: /* reading from code seg */ 1619 if (config.exe & EX_flat) 1620 goto L3; 1621 Lfardata: 1622 { 1623 regm_t regm = ALLREGS & ~keepmsk; // need scratch register 1624 allocreg(cdb, ®m, ®, TYint); 1625 getregs(cdb,mES); 1626 // MOV mreg,seg of symbol 1627 cdb.gencs(0xB8 + reg, 0, FLextern, s); 1628 cdb.last().Iflags = CFseg; 1629 cdb.gen2(0x8E, modregrmx(3, 0, reg)); // MOV ES,reg 1630 pcs.Iflags |= CFes | CFoff; /* ES segment override */ 1631 goto L3; 1632 } 1633 1634 case FLstack: 1635 assert(!I16); 1636 pcs.Irm = modregrm(2, 0, 4); 1637 pcs.Isib = modregrm(0, 4, SP); 1638 pcs.IEV1.Vsym = s; 1639 pcs.IEV1.Voffset = e.EV.Voffset; 1640 break; 1641 1642 default: 1643 WRFL(cast(FL)fl); 1644 symbol_print(s); 1645 assert(0); 1646 } 1647 } 1648 1649 /***************************** 1650 * Given an opcode and EA in cs, generate code 1651 * for each floating register in turn. 1652 * Input: 1653 * tym either TYdouble or TYfloat 1654 */ 1655 1656 void fltregs(ref CodeBuilder cdb, code* pcs, tym_t tym) 1657 { 1658 assert(!I64); 1659 tym = tybasic(tym); 1660 if (I32) 1661 { 1662 getregs(cdb,(tym == TYfloat) ? mAX : mAX | mDX); 1663 if (tym != TYfloat) 1664 { 1665 pcs.IEV1.Voffset += REGSIZE; 1666 NEWREG(pcs.Irm,DX); 1667 cdb.gen(pcs); 1668 pcs.IEV1.Voffset -= REGSIZE; 1669 } 1670 NEWREG(pcs.Irm,AX); 1671 cdb.gen(pcs); 1672 } 1673 else 1674 { 1675 getregs(cdb,(tym == TYfloat) ? FLOATREGS_16 : DOUBLEREGS_16); 1676 pcs.IEV1.Voffset += (tym == TYfloat) ? 2 : 6; 1677 if (tym == TYfloat) 1678 NEWREG(pcs.Irm, DX); 1679 else 1680 NEWREG(pcs.Irm, AX); 1681 cdb.gen(pcs); 1682 pcs.IEV1.Voffset -= 2; 1683 if (tym == TYfloat) 1684 NEWREG(pcs.Irm, AX); 1685 else 1686 NEWREG(pcs.Irm, BX); 1687 cdb.gen(pcs); 1688 if (tym != TYfloat) 1689 { 1690 pcs.IEV1.Voffset -= 2; 1691 NEWREG(pcs.Irm, CX); 1692 cdb.gen(pcs); 1693 pcs.IEV1.Voffset -= 2; /* note that exit is with Voffset unaltered */ 1694 NEWREG(pcs.Irm, DX); 1695 cdb.gen(pcs); 1696 } 1697 } 1698 } 1699 1700 1701 /***************************** 1702 * Given a result in registers, test it for true or false. 1703 * Will fail if TYfptr and the reg is ES! 1704 * If saveflag is true, preserve the contents of the 1705 * registers. 1706 */ 1707 1708 void tstresult(ref CodeBuilder cdb, regm_t regm, tym_t tym, uint saveflag) 1709 { 1710 reg_t scrreg; // scratch register 1711 regm_t scrregm; 1712 1713 //if (!(regm & (mBP | ALLREGS))) 1714 //printf("tstresult(regm = %s, tym = x%x, saveflag = %d)\n", 1715 //regm_str(regm),tym,saveflag); 1716 1717 assert(regm & (XMMREGS | mBP | ALLREGS)); 1718 tym = tybasic(tym); 1719 reg_t reg = findreg(regm); 1720 uint sz = _tysize[tym]; 1721 if (sz == 1) 1722 { 1723 assert(regm & BYTEREGS); 1724 genregs(cdb, 0x84, reg, reg); // TEST regL,regL 1725 if (I64 && reg >= 4) 1726 code_orrex(cdb.last(), REX); 1727 return; 1728 } 1729 if (regm & XMMREGS) 1730 { 1731 reg_t xreg; 1732 regm_t xregs = XMMREGS & ~regm; 1733 allocreg(cdb,&xregs, &xreg, TYdouble); 1734 opcode_t op = 0; 1735 if (tym == TYdouble || tym == TYidouble || tym == TYcdouble) 1736 op = 0x660000; 1737 cdb.gen2(op | 0x0F57, modregrm(3, xreg-XMM0, xreg-XMM0)); // XORPS xreg,xreg 1738 cdb.gen2(op | 0x0F2E, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1739 if (tym == TYcfloat || tym == TYcdouble) 1740 { code *cnop = gennop(null); 1741 genjmp(cdb, JNE, FLcode, cast(block *) cnop); // JNE L1 1742 genjmp(cdb, JP, FLcode, cast(block *) cnop); // JP L1 1743 reg = findreg(regm & ~mask(reg)); 1744 cdb.gen2(op | 0x0F2E, modregrm(3, xreg-XMM0, reg-XMM0)); // UCOMISS xreg,reg 1745 cdb.append(cnop); 1746 } 1747 return; 1748 } 1749 if (sz <= REGSIZE) 1750 { 1751 if (!I16) 1752 { 1753 if (tym == TYfloat) 1754 { 1755 if (saveflag) 1756 { 1757 scrregm = allregs & ~regm; // possible scratch regs 1758 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1759 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1760 reg = scrreg; 1761 } 1762 getregs(cdb, mask(reg)); 1763 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 1764 return; 1765 } 1766 gentstreg(cdb,reg); // TEST reg,reg 1767 if (sz == SHORTSIZE) 1768 cdb.last().Iflags |= CFopsize; // 16 bit operands 1769 else if (sz == 8) 1770 code_orrex(cdb.last(), REX_W); 1771 } 1772 else 1773 gentstreg(cdb, reg); // TEST reg,reg 1774 return; 1775 } 1776 1777 if (saveflag || tyfv(tym)) 1778 { 1779 L1: 1780 scrregm = ALLREGS & ~regm; // possible scratch regs 1781 allocreg(cdb, &scrregm, &scrreg, TYoffset); // allocate scratch reg 1782 if (I32 || sz == REGSIZE * 2) 1783 { 1784 assert(regm & mMSW && regm & mLSW); 1785 1786 reg = findregmsw(regm); 1787 if (I32) 1788 { 1789 if (tyfv(tym)) 1790 genregs(cdb, 0x0FB7, scrreg, reg); // MOVZX scrreg,msreg 1791 else 1792 { 1793 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1794 if (tym == TYdouble || tym == TYdouble_alias) 1795 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1796 } 1797 } 1798 else 1799 { 1800 genmovreg(cdb, scrreg, reg); // MOV scrreg,msreg 1801 if (tym == TYfloat) 1802 cdb.gen2(0xD1, modregrm(3, 4, scrreg)); // SHL scrreg,1 1803 } 1804 reg = findreglsw(regm); 1805 genorreg(cdb, scrreg, reg); // OR scrreg,lsreg 1806 } 1807 else if (sz == 8) 1808 { 1809 // !I32 1810 genmovreg(cdb, scrreg, AX); // MOV scrreg,AX 1811 if (tym == TYdouble || tym == TYdouble_alias) 1812 cdb.gen2(0xD1 ,modregrm(3, 4, scrreg)); // SHL scrreg,1 1813 genorreg(cdb, scrreg, BX); // OR scrreg,BX 1814 genorreg(cdb, scrreg, CX); // OR scrreg,CX 1815 genorreg(cdb, scrreg, DX); // OR scrreg,DX 1816 } 1817 else 1818 assert(0); 1819 } 1820 else 1821 { 1822 if (I32 || sz == REGSIZE * 2) 1823 { 1824 // can't test ES:LSW for 0 1825 assert(regm & mMSW & ALLREGS && regm & (mLSW | mBP)); 1826 1827 reg = findregmsw(regm); 1828 if (regcon.mvar & mask(reg)) // if register variable 1829 goto L1; // don't trash it 1830 getregs(cdb, mask(reg)); // we're going to trash reg 1831 if (tyfloating(tym) && sz == 2 * _tysize[TYint]) 1832 cdb.gen2(0xD1, modregrm(3 ,4, reg)); // SHL reg,1 1833 genorreg(cdb, reg, findreglsw(regm)); // OR reg,reg+1 1834 if (I64) 1835 code_orrex(cdb.last(), REX_W); 1836 } 1837 else if (sz == 8) 1838 { assert(regm == DOUBLEREGS_16); 1839 getregs(cdb,mAX); // allocate AX 1840 if (tym == TYdouble || tym == TYdouble_alias) 1841 cdb.gen2(0xD1, modregrm(3, 4, AX)); // SHL AX,1 1842 genorreg(cdb, AX, BX); // OR AX,BX 1843 genorreg(cdb, AX, CX); // OR AX,CX 1844 genorreg(cdb, AX, DX); // OR AX,DX 1845 } 1846 else 1847 assert(0); 1848 } 1849 code_orflag(cdb.last(),CFpsw); 1850 } 1851 1852 /****************************** 1853 * Given the result of an expression is in retregs, 1854 * generate necessary code to return result in *pretregs. 1855 */ 1856 1857 void fixresult(ref CodeBuilder cdb, elem *e, regm_t retregs, regm_t *pretregs) 1858 { 1859 //printf("fixresult(e = %p, retregs = %s, *pretregs = %s)\n",e,regm_str(retregs),regm_str(*pretregs)); 1860 if (*pretregs == 0) return; // if don't want result 1861 assert(e && retregs); // need something to work with 1862 regm_t forccs = *pretregs & mPSW; 1863 regm_t forregs = *pretregs & (mST01 | mST0 | mBP | ALLREGS | mES | mSTACK | XMMREGS); 1864 tym_t tym = tybasic(e.Ety); 1865 1866 if (tym == TYstruct) 1867 { 1868 if (e.Eoper == OPpair || e.Eoper == OPrpair) 1869 { 1870 if (I64) 1871 tym = TYucent; 1872 else 1873 tym = TYullong; 1874 } 1875 else 1876 // Hack to support cdstreq() 1877 tym = (forregs & mMSW) ? TYfptr : TYnptr; 1878 } 1879 int sz = _tysize[tym]; 1880 1881 if (sz == 1) 1882 { 1883 assert(retregs & BYTEREGS); 1884 const reg = findreg(retregs); 1885 if (e.Eoper == OPvar && 1886 e.EV.Voffset == 1 && 1887 e.EV.Vsym.Sfl == FLreg) 1888 { 1889 assert(reg < 4); 1890 if (forccs) 1891 cdb.gen2(0x84, modregrm(3, reg | 4, reg | 4)); // TEST regH,regH 1892 forccs = 0; 1893 } 1894 } 1895 1896 reg_t reg,rreg; 1897 if ((retregs & forregs) == retregs) // if already in right registers 1898 *pretregs = retregs; 1899 else if (forregs) // if return the result in registers 1900 { 1901 if (forregs & (mST01 | mST0)) 1902 { 1903 fixresult87(cdb, e, retregs, pretregs); 1904 return; 1905 } 1906 uint opsflag = false; 1907 if (I16 && sz == 8) 1908 { 1909 if (forregs & mSTACK) 1910 { 1911 assert(retregs == DOUBLEREGS_16); 1912 // Push floating regs 1913 cdb.gen1(0x50 + AX); 1914 cdb.gen1(0x50 + BX); 1915 cdb.gen1(0x50 + CX); 1916 cdb.gen1(0x50 + DX); 1917 stackpush += DOUBLESIZE; 1918 } 1919 else if (retregs & mSTACK) 1920 { 1921 assert(forregs == DOUBLEREGS_16); 1922 // Pop floating regs 1923 getregs(cdb,forregs); 1924 cdb.gen1(0x58 + DX); 1925 cdb.gen1(0x58 + CX); 1926 cdb.gen1(0x58 + BX); 1927 cdb.gen1(0x58 + AX); 1928 stackpush -= DOUBLESIZE; 1929 retregs = DOUBLEREGS_16; // for tstresult() below 1930 } 1931 else 1932 { 1933 debug 1934 printf("retregs = %s, forregs = %s\n", regm_str(retregs), regm_str(forregs)), 1935 assert(0); 1936 } 1937 if (!OTleaf(e.Eoper)) 1938 opsflag = true; 1939 } 1940 else 1941 { 1942 allocreg(cdb, pretregs, &rreg, tym); // allocate return regs 1943 if (retregs & XMMREGS) 1944 { 1945 reg = findreg(retregs & XMMREGS); 1946 // MOVSD floatreg, XMM? 1947 cdb.genxmmreg(xmmstore(tym), reg, 0, tym); 1948 if (mask(rreg) & XMMREGS) 1949 // MOVSD XMM?, floatreg 1950 cdb.genxmmreg(xmmload(tym), rreg, 0, tym); 1951 else 1952 { 1953 // MOV rreg,floatreg 1954 cdb.genfltreg(0x8B,rreg,0); 1955 if (sz == 8) 1956 { 1957 if (I32) 1958 { 1959 rreg = findregmsw(*pretregs); 1960 cdb.genfltreg(0x8B, rreg,4); 1961 } 1962 else 1963 code_orrex(cdb.last(),REX_W); 1964 } 1965 } 1966 } 1967 else if (forregs & XMMREGS) 1968 { 1969 reg = findreg(retregs & (mBP | ALLREGS)); 1970 switch (sz) 1971 { 1972 case 4: 1973 cdb.gen2(LODD, modregxrmx(3, rreg - XMM0, reg)); // MOVD xmm,reg 1974 break; 1975 1976 case 8: 1977 if (I32) 1978 { 1979 cdb.genfltreg(0x89, reg, 0); 1980 reg = findregmsw(retregs); 1981 cdb.genfltreg(0x89, reg, 4); 1982 cdb.genxmmreg(xmmload(tym), rreg, 0, tym); // MOVQ xmm,mem 1983 } 1984 else 1985 { 1986 cdb.gen2(LODD /* [sic!] */, modregxrmx(3, rreg - XMM0, reg)); 1987 code_orrex(cdb.last(), REX_W); // MOVQ xmm,reg 1988 } 1989 break; 1990 1991 default: 1992 assert(false); 1993 } 1994 checkSetVex(cdb.last(), tym); 1995 } 1996 else if (sz > REGSIZE) 1997 { 1998 uint msreg = findregmsw(retregs); 1999 uint lsreg = findreglsw(retregs); 2000 uint msrreg = findregmsw(*pretregs); 2001 uint lsrreg = findreglsw(*pretregs); 2002 2003 genmovreg(cdb, msrreg, msreg); // MOV msrreg,msreg 2004 genmovreg(cdb, lsrreg, lsreg); // MOV lsrreg,lsreg 2005 } 2006 else 2007 { 2008 assert(!(retregs & XMMREGS)); 2009 assert(!(forregs & XMMREGS)); 2010 reg = findreg(retregs & (mBP | ALLREGS)); 2011 if (I64 && sz <= 4) 2012 genregs(cdb, 0x89, reg, rreg); // only move 32 bits, and zero the top 32 bits 2013 else 2014 genmovreg(cdb, rreg, reg); // MOV rreg,reg 2015 } 2016 } 2017 cssave(e,retregs | *pretregs,opsflag); 2018 // Commented out due to Bugzilla 8840 2019 //forregs = 0; // don't care about result in reg cuz real result is in rreg 2020 retregs = *pretregs & ~mPSW; 2021 } 2022 if (forccs) // if return result in flags 2023 { 2024 if (retregs & (mST01 | mST0)) 2025 fixresult87(cdb, e, retregs, pretregs); 2026 else 2027 tstresult(cdb, retregs, tym, forregs); 2028 } 2029 } 2030 2031 /******************************* 2032 * Extra information about each CLIB runtime library function. 2033 */ 2034 2035 enum 2036 { 2037 INF32 = 1, /// if 32 bit only 2038 INFfloat = 2, /// if this is floating point 2039 INFwkdone = 4, /// if weak extern is already done 2040 INF64 = 8, /// if 64 bit only 2041 INFpushebx = 0x10, /// push EBX before load_localgot() 2042 INFpusheabcdx = 0x20, /// pass EAX/EBX/ECX/EDX on stack, callee does ret 16 2043 } 2044 2045 struct ClibInfo 2046 { 2047 regm_t retregs16; /* registers that 16 bit result is returned in */ 2048 regm_t retregs32; /* registers that 32 bit result is returned in */ 2049 ubyte pop; // # of bytes popped off of stack upon return 2050 ubyte flags; /// INFxxx 2051 byte push87; // # of pushes onto the 8087 stack 2052 byte pop87; // # of pops off of the 8087 stack 2053 } 2054 2055 __gshared int clib_inited = false; // true if initialized 2056 2057 Symbol* symboly(const(char)* name, regm_t desregs) 2058 { 2059 Symbol *s = symbol_calloc(name); 2060 s.Stype = tsclib; 2061 s.Sclass = SCextern; 2062 s.Sfl = FLfunc; 2063 s.Ssymnum = 0; 2064 s.Sregsaved = ~desregs & (mBP | mES | ALLREGS); 2065 return s; 2066 } 2067 2068 void getClibInfo(uint clib, Symbol** ps, ClibInfo** pinfo) 2069 { 2070 __gshared Symbol*[CLIB.MAX] clibsyms; 2071 __gshared ClibInfo[CLIB.MAX] clibinfo; 2072 2073 if (!clib_inited) 2074 { 2075 for (size_t i = 0; i < CLIB.MAX; ++i) 2076 { 2077 Symbol* s = clibsyms[i]; 2078 if (s) 2079 { 2080 s.Sxtrnnum = 0; 2081 s.Stypidx = 0; 2082 clibinfo[i].flags &= ~INFwkdone; 2083 } 2084 } 2085 clib_inited = true; 2086 } 2087 2088 const uint ex_unix = (EX_LINUX | EX_LINUX64 | 2089 EX_OSX | EX_OSX64 | 2090 EX_FREEBSD | EX_FREEBSD64 | 2091 EX_OPENBSD | EX_OPENBSD64 | 2092 EX_DRAGONFLYBSD64 | 2093 EX_SOLARIS | EX_SOLARIS64); 2094 2095 ClibInfo* cinfo = &clibinfo[clib]; 2096 Symbol* s = clibsyms[clib]; 2097 if (!s) 2098 { 2099 2100 switch (clib) 2101 { 2102 case CLIB.lcmp: 2103 { 2104 const(char)* name = (config.exe & ex_unix) ? "__LCMP__" : "_LCMP@"; 2105 s = symboly(name, 0); 2106 } 2107 break; 2108 2109 case CLIB.lmul: 2110 { 2111 const(char)* name = (config.exe & ex_unix) ? "__LMUL__" : "_LMUL@"; 2112 s = symboly(name, mAX|mCX|mDX); 2113 cinfo.retregs16 = mDX|mAX; 2114 cinfo.retregs32 = mDX|mAX; 2115 } 2116 break; 2117 2118 case CLIB.ldiv: 2119 cinfo.retregs16 = mDX|mAX; 2120 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2121 { 2122 s = symboly("__divdi3", mAX|mBX|mCX|mDX); 2123 cinfo.flags = INFpushebx; 2124 cinfo.retregs32 = mDX|mAX; 2125 } 2126 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2127 { 2128 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2129 cinfo.flags = INFpushebx; 2130 cinfo.retregs32 = mDX|mAX; 2131 } 2132 else if (I32 && config.objfmt == OBJ_MSCOFF) 2133 { 2134 s = symboly("_alldiv", mAX|mBX|mCX|mDX); 2135 cinfo.flags = INFpusheabcdx; 2136 cinfo.retregs32 = mDX|mAX; 2137 } 2138 else 2139 { 2140 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2141 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2142 cinfo.retregs32 = mDX|mAX; 2143 } 2144 break; 2145 2146 case CLIB.lmod: 2147 cinfo.retregs16 = mCX|mBX; 2148 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2149 { 2150 s = symboly("__moddi3", mAX|mBX|mCX|mDX); 2151 cinfo.flags = INFpushebx; 2152 cinfo.retregs32 = mDX|mAX; 2153 } 2154 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2155 { 2156 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2157 cinfo.flags = INFpushebx; 2158 cinfo.retregs32 = mCX|mBX; 2159 } 2160 else if (I32 && config.objfmt == OBJ_MSCOFF) 2161 { 2162 s = symboly("_allrem", mAX|mBX|mCX|mDX); 2163 cinfo.flags = INFpusheabcdx; 2164 cinfo.retregs32 = mAX|mDX; 2165 } 2166 else 2167 { 2168 const(char)* name = (config.exe & ex_unix) ? "__LDIV__" : "_LDIV@"; 2169 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2170 cinfo.retregs32 = mCX|mBX; 2171 } 2172 break; 2173 2174 case CLIB.uldiv: 2175 cinfo.retregs16 = mDX|mAX; 2176 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2177 { 2178 s = symboly("__udivdi3", mAX|mBX|mCX|mDX); 2179 cinfo.flags = INFpushebx; 2180 cinfo.retregs32 = mDX|mAX; 2181 } 2182 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2183 { 2184 s = symboly("__ULDIV2__", mAX|mBX|mCX|mDX); 2185 cinfo.flags = INFpushebx; 2186 cinfo.retregs32 = mDX|mAX; 2187 } 2188 else if (I32 && config.objfmt == OBJ_MSCOFF) 2189 { 2190 s = symboly("_aulldiv", mAX|mBX|mCX|mDX); 2191 cinfo.flags = INFpusheabcdx; 2192 cinfo.retregs32 = mDX|mAX; 2193 } 2194 else 2195 { 2196 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2197 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2198 cinfo.retregs32 = mDX|mAX; 2199 } 2200 break; 2201 2202 case CLIB.ulmod: 2203 cinfo.retregs16 = mCX|mBX; 2204 if (config.exe & (EX_LINUX | EX_FREEBSD)) 2205 { 2206 s = symboly("__umoddi3", mAX|mBX|mCX|mDX); 2207 cinfo.flags = INFpushebx; 2208 cinfo.retregs32 = mDX|mAX; 2209 } 2210 else if (config.exe & (EX_OPENBSD | EX_SOLARIS)) 2211 { 2212 s = symboly("__LDIV2__", mAX|mBX|mCX|mDX); 2213 cinfo.flags = INFpushebx; 2214 cinfo.retregs32 = mCX|mBX; 2215 } 2216 else if (I32 && config.objfmt == OBJ_MSCOFF) 2217 { 2218 s = symboly("_aullrem", mAX|mBX|mCX|mDX); 2219 cinfo.flags = INFpusheabcdx; 2220 cinfo.retregs32 = mAX|mDX; 2221 } 2222 else 2223 { 2224 const(char)* name = (config.exe & ex_unix) ? "__ULDIV__" : "_ULDIV@"; 2225 s = symboly(name, (config.exe & ex_unix) ? mAX|mBX|mCX|mDX : ALLREGS); 2226 cinfo.retregs32 = mCX|mBX; 2227 } 2228 break; 2229 2230 // This section is only for Windows and DOS (i.e. machines without the x87 FPU) 2231 case CLIB.dmul: 2232 s = symboly("_DMUL@",mAX|mBX|mCX|mDX); 2233 cinfo.retregs16 = DOUBLEREGS_16; 2234 cinfo.retregs32 = DOUBLEREGS_32; 2235 cinfo.pop = 8; 2236 cinfo.flags = INFfloat; 2237 cinfo.push87 = 1; 2238 cinfo.pop87 = 1; 2239 break; 2240 2241 case CLIB.ddiv: 2242 s = symboly("_DDIV@",mAX|mBX|mCX|mDX); 2243 cinfo.retregs16 = DOUBLEREGS_16; 2244 cinfo.retregs32 = DOUBLEREGS_32; 2245 cinfo.pop = 8; 2246 cinfo.flags = INFfloat; 2247 cinfo.push87 = 1; 2248 cinfo.pop87 = 1; 2249 break; 2250 2251 case CLIB.dtst0: 2252 s = symboly("_DTST0@",0); 2253 cinfo.flags = INFfloat; 2254 break; 2255 2256 case CLIB.dtst0exc: 2257 s = symboly("_DTST0EXC@",0); 2258 cinfo.flags = INFfloat; 2259 break; 2260 2261 case CLIB.dcmp: 2262 s = symboly("_DCMP@",0); 2263 cinfo.pop = 8; 2264 cinfo.flags = INFfloat; 2265 cinfo.push87 = 1; 2266 cinfo.pop87 = 1; 2267 break; 2268 2269 case CLIB.dcmpexc: 2270 s = symboly("_DCMPEXC@",0); 2271 cinfo.pop = 8; 2272 cinfo.flags = INFfloat; 2273 cinfo.push87 = 1; 2274 cinfo.pop87 = 1; 2275 break; 2276 2277 case CLIB.dneg: 2278 s = symboly("_DNEG@",I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2279 cinfo.retregs16 = DOUBLEREGS_16; 2280 cinfo.retregs32 = DOUBLEREGS_32; 2281 cinfo.flags = INFfloat; 2282 break; 2283 2284 case CLIB.dadd: 2285 s = symboly("_DADD@",mAX|mBX|mCX|mDX); 2286 cinfo.retregs16 = DOUBLEREGS_16; 2287 cinfo.retregs32 = DOUBLEREGS_32; 2288 cinfo.pop = 8; 2289 cinfo.flags = INFfloat; 2290 cinfo.push87 = 1; 2291 cinfo.pop87 = 1; 2292 break; 2293 2294 case CLIB.dsub: 2295 s = symboly("_DSUB@",mAX|mBX|mCX|mDX); 2296 cinfo.retregs16 = DOUBLEREGS_16; 2297 cinfo.retregs32 = DOUBLEREGS_32; 2298 cinfo.pop = 8; 2299 cinfo.flags = INFfloat; 2300 cinfo.push87 = 1; 2301 cinfo.pop87 = 1; 2302 break; 2303 2304 case CLIB.fmul: 2305 s = symboly("_FMUL@",mAX|mBX|mCX|mDX); 2306 cinfo.retregs16 = FLOATREGS_16; 2307 cinfo.retregs32 = FLOATREGS_32; 2308 cinfo.flags = INFfloat; 2309 cinfo.push87 = 1; 2310 cinfo.pop87 = 1; 2311 break; 2312 2313 case CLIB.fdiv: 2314 s = symboly("_FDIV@",mAX|mBX|mCX|mDX); 2315 cinfo.retregs16 = FLOATREGS_16; 2316 cinfo.retregs32 = FLOATREGS_32; 2317 cinfo.flags = INFfloat; 2318 cinfo.push87 = 1; 2319 cinfo.pop87 = 1; 2320 break; 2321 2322 case CLIB.ftst0: 2323 s = symboly("_FTST0@",0); 2324 cinfo.flags = INFfloat; 2325 break; 2326 2327 case CLIB.ftst0exc: 2328 s = symboly("_FTST0EXC@",0); 2329 cinfo.flags = INFfloat; 2330 break; 2331 2332 case CLIB.fcmp: 2333 s = symboly("_FCMP@",0); 2334 cinfo.flags = INFfloat; 2335 cinfo.push87 = 1; 2336 cinfo.pop87 = 1; 2337 break; 2338 2339 case CLIB.fcmpexc: 2340 s = symboly("_FCMPEXC@",0); 2341 cinfo.flags = INFfloat; 2342 cinfo.push87 = 1; 2343 cinfo.pop87 = 1; 2344 break; 2345 2346 case CLIB.fneg: 2347 s = symboly("_FNEG@",I16 ? FLOATREGS_16 : FLOATREGS_32); 2348 cinfo.retregs16 = FLOATREGS_16; 2349 cinfo.retregs32 = FLOATREGS_32; 2350 cinfo.flags = INFfloat; 2351 break; 2352 2353 case CLIB.fadd: 2354 s = symboly("_FADD@",mAX|mBX|mCX|mDX); 2355 cinfo.retregs16 = FLOATREGS_16; 2356 cinfo.retregs32 = FLOATREGS_32; 2357 cinfo.flags = INFfloat; 2358 cinfo.push87 = 1; 2359 cinfo.pop87 = 1; 2360 break; 2361 2362 case CLIB.fsub: 2363 s = symboly("_FSUB@",mAX|mBX|mCX|mDX); 2364 cinfo.retregs16 = FLOATREGS_16; 2365 cinfo.retregs32 = FLOATREGS_32; 2366 cinfo.flags = INFfloat; 2367 cinfo.push87 = 1; 2368 cinfo.pop87 = 1; 2369 break; 2370 2371 case CLIB.dbllng: 2372 { 2373 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG" : "_DBLLNG@"; 2374 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2375 cinfo.retregs16 = mDX | mAX; 2376 cinfo.retregs32 = mAX; 2377 cinfo.flags = INFfloat; 2378 cinfo.push87 = 1; 2379 cinfo.pop87 = 1; 2380 break; 2381 } 2382 2383 case CLIB.lngdbl: 2384 { 2385 const(char)* name = (config.exe & ex_unix) ? "__LNGDBL" : "_LNGDBL@"; 2386 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2387 cinfo.retregs16 = DOUBLEREGS_16; 2388 cinfo.retregs32 = DOUBLEREGS_32; 2389 cinfo.flags = INFfloat; 2390 cinfo.push87 = 1; 2391 cinfo.pop87 = 1; 2392 break; 2393 } 2394 2395 case CLIB.dblint: 2396 { 2397 const(char)* name = (config.exe & ex_unix) ? "__DBLINT" : "_DBLINT@"; 2398 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2399 cinfo.retregs16 = mAX; 2400 cinfo.retregs32 = mAX; 2401 cinfo.flags = INFfloat; 2402 cinfo.push87 = 1; 2403 cinfo.pop87 = 1; 2404 break; 2405 } 2406 2407 case CLIB.intdbl: 2408 { 2409 const(char)* name = (config.exe & ex_unix) ? "__INTDBL" : "_INTDBL@"; 2410 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2411 cinfo.retregs16 = DOUBLEREGS_16; 2412 cinfo.retregs32 = DOUBLEREGS_32; 2413 cinfo.flags = INFfloat; 2414 cinfo.push87 = 1; 2415 cinfo.pop87 = 1; 2416 break; 2417 } 2418 2419 case CLIB.dbluns: 2420 { 2421 const(char)* name = (config.exe & ex_unix) ? "__DBLUNS" : "_DBLUNS@"; 2422 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2423 cinfo.retregs16 = mAX; 2424 cinfo.retregs32 = mAX; 2425 cinfo.flags = INFfloat; 2426 cinfo.push87 = 1; 2427 cinfo.pop87 = 1; 2428 break; 2429 } 2430 2431 case CLIB.unsdbl: 2432 // Y(DOUBLEREGS_32,"__UNSDBL"), // CLIB.unsdbl 2433 // Y(DOUBLEREGS_16,"_UNSDBL@"), 2434 // {DOUBLEREGS_16,DOUBLEREGS_32,0,INFfloat,1,1}, // _UNSDBL@ unsdbl 2435 { 2436 const(char)* name = (config.exe & ex_unix) ? "__UNSDBL" : "_UNSDBL@"; 2437 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2438 cinfo.retregs16 = DOUBLEREGS_16; 2439 cinfo.retregs32 = DOUBLEREGS_32; 2440 cinfo.flags = INFfloat; 2441 cinfo.push87 = 1; 2442 cinfo.pop87 = 1; 2443 break; 2444 } 2445 2446 case CLIB.dblulng: 2447 { 2448 const(char)* name = (config.exe & ex_unix) ? "__DBLULNG" : "_DBLULNG@"; 2449 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2450 cinfo.retregs16 = mDX|mAX; 2451 cinfo.retregs32 = mAX; 2452 cinfo.flags = (config.exe & ex_unix) ? INFfloat | INF32 : INFfloat; 2453 cinfo.push87 = (config.exe & ex_unix) ? 0 : 1; 2454 cinfo.pop87 = 1; 2455 break; 2456 } 2457 2458 case CLIB.ulngdbl: 2459 { 2460 const(char)* name = (config.exe & ex_unix) ? "__ULNGDBL@" : "_ULNGDBL@"; 2461 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2462 cinfo.retregs16 = DOUBLEREGS_16; 2463 cinfo.retregs32 = DOUBLEREGS_32; 2464 cinfo.flags = INFfloat; 2465 cinfo.push87 = 1; 2466 cinfo.pop87 = 1; 2467 break; 2468 } 2469 2470 case CLIB.dblflt: 2471 { 2472 const(char)* name = (config.exe & ex_unix) ? "__DBLFLT" : "_DBLFLT@"; 2473 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2474 cinfo.retregs16 = FLOATREGS_16; 2475 cinfo.retregs32 = FLOATREGS_32; 2476 cinfo.flags = INFfloat; 2477 cinfo.push87 = 1; 2478 cinfo.pop87 = 1; 2479 break; 2480 } 2481 2482 case CLIB.fltdbl: 2483 { 2484 const(char)* name = (config.exe & ex_unix) ? "__FLTDBL" : "_FLTDBL@"; 2485 s = symboly(name, I16 ? ALLREGS : DOUBLEREGS_32); 2486 cinfo.retregs16 = DOUBLEREGS_16; 2487 cinfo.retregs32 = DOUBLEREGS_32; 2488 cinfo.flags = INFfloat; 2489 cinfo.push87 = 1; 2490 cinfo.pop87 = 1; 2491 break; 2492 } 2493 2494 case CLIB.dblllng: 2495 { 2496 const(char)* name = (config.exe & ex_unix) ? "__DBLLLNG" : "_DBLLLNG@"; 2497 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2498 cinfo.retregs16 = DOUBLEREGS_16; 2499 cinfo.retregs32 = mDX|mAX; 2500 cinfo.flags = INFfloat; 2501 cinfo.push87 = 1; 2502 cinfo.pop87 = 1; 2503 break; 2504 } 2505 2506 case CLIB.llngdbl: 2507 { 2508 const(char)* name = (config.exe & ex_unix) ? "__LLNGDBL" : "_LLNGDBL@"; 2509 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2510 cinfo.retregs16 = DOUBLEREGS_16; 2511 cinfo.retregs32 = DOUBLEREGS_32; 2512 cinfo.flags = INFfloat; 2513 cinfo.push87 = 1; 2514 cinfo.pop87 = 1; 2515 break; 2516 } 2517 2518 case CLIB.dblullng: 2519 { 2520 if (config.exe == EX_WIN64) 2521 { 2522 s = symboly("__DBLULLNG", DOUBLEREGS_32); 2523 cinfo.retregs32 = mAX; 2524 cinfo.flags = INFfloat; 2525 cinfo.push87 = 2; 2526 cinfo.pop87 = 2; 2527 } 2528 else 2529 { 2530 const(char)* name = (config.exe & ex_unix) ? "__DBLULLNG" : "_DBLULLNG@"; 2531 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2532 cinfo.retregs16 = DOUBLEREGS_16; 2533 cinfo.retregs32 = I64 ? mAX : mDX|mAX; 2534 cinfo.flags = INFfloat; 2535 cinfo.push87 = (config.exe & ex_unix) ? 2 : 1; 2536 cinfo.pop87 = (config.exe & ex_unix) ? 2 : 1; 2537 } 2538 break; 2539 } 2540 2541 case CLIB.ullngdbl: 2542 { 2543 if (config.exe == EX_WIN64) 2544 { 2545 s = symboly("__ULLNGDBL", DOUBLEREGS_32); 2546 cinfo.retregs32 = mAX; 2547 cinfo.flags = INFfloat; 2548 cinfo.push87 = 1; 2549 cinfo.pop87 = 1; 2550 } 2551 else 2552 { 2553 const(char)* name = (config.exe & ex_unix) ? "__ULLNGDBL" : "_ULLNGDBL@"; 2554 s = symboly(name, I16 ? DOUBLEREGS_16 : DOUBLEREGS_32); 2555 cinfo.retregs16 = DOUBLEREGS_16; 2556 cinfo.retregs32 = I64 ? mAX : DOUBLEREGS_32; 2557 cinfo.flags = INFfloat; 2558 cinfo.push87 = 1; 2559 cinfo.pop87 = 1; 2560 } 2561 break; 2562 } 2563 2564 case CLIB.dtst: 2565 { 2566 const(char)* name = (config.exe & ex_unix) ? "__DTST" : "_DTST@"; 2567 s = symboly(name, 0); 2568 cinfo.flags = INFfloat; 2569 break; 2570 } 2571 2572 case CLIB.vptrfptr: 2573 { 2574 const(char)* name = (config.exe & ex_unix) ? "__HTOFPTR" : "_HTOFPTR@"; 2575 s = symboly(name, mES|mBX); 2576 cinfo.retregs16 = mES|mBX; 2577 cinfo.retregs32 = mES|mBX; 2578 break; 2579 } 2580 2581 case CLIB.cvptrfptr: 2582 { 2583 const(char)* name = (config.exe & ex_unix) ? "__HCTOFPTR" : "_HCTOFPTR@"; 2584 s = symboly(name, mES|mBX); 2585 cinfo.retregs16 = mES|mBX; 2586 cinfo.retregs32 = mES|mBX; 2587 break; 2588 } 2589 2590 case CLIB._87topsw: 2591 { 2592 const(char)* name = (config.exe & ex_unix) ? "__87TOPSW" : "_87TOPSW@"; 2593 s = symboly(name, 0); 2594 cinfo.flags = INFfloat; 2595 break; 2596 } 2597 2598 case CLIB.fltto87: 2599 { 2600 const(char)* name = (config.exe & ex_unix) ? "__FLTTO87" : "_FLTTO87@"; 2601 s = symboly(name, mST0); 2602 cinfo.retregs16 = mST0; 2603 cinfo.retregs32 = mST0; 2604 cinfo.flags = INFfloat; 2605 cinfo.push87 = 1; 2606 break; 2607 } 2608 2609 case CLIB.dblto87: 2610 { 2611 const(char)* name = (config.exe & ex_unix) ? "__DBLTO87" : "_DBLTO87@"; 2612 s = symboly(name, mST0); 2613 cinfo.retregs16 = mST0; 2614 cinfo.retregs32 = mST0; 2615 cinfo.flags = INFfloat; 2616 cinfo.push87 = 1; 2617 break; 2618 } 2619 2620 case CLIB.dblint87: 2621 { 2622 const(char)* name = (config.exe & ex_unix) ? "__DBLINT87" : "_DBLINT87@"; 2623 s = symboly(name, mST0|mAX); 2624 cinfo.retregs16 = mAX; 2625 cinfo.retregs32 = mAX; 2626 cinfo.flags = INFfloat; 2627 break; 2628 } 2629 2630 case CLIB.dbllng87: 2631 { 2632 const(char)* name = (config.exe & ex_unix) ? "__DBLLNG87" : "_DBLLNG87@"; 2633 s = symboly(name, mST0|mAX|mDX); 2634 cinfo.retregs16 = mDX|mAX; 2635 cinfo.retregs32 = mAX; 2636 cinfo.flags = INFfloat; 2637 break; 2638 } 2639 2640 case CLIB.ftst: 2641 { 2642 const(char)* name = (config.exe & ex_unix) ? "__FTST" : "_FTST@"; 2643 s = symboly(name, 0); 2644 cinfo.flags = INFfloat; 2645 break; 2646 } 2647 2648 case CLIB.fcompp: 2649 { 2650 const(char)* name = (config.exe & ex_unix) ? "__FCOMPP" : "_FCOMPP@"; 2651 s = symboly(name, 0); 2652 cinfo.retregs16 = mPSW; 2653 cinfo.retregs32 = mPSW; 2654 cinfo.flags = INFfloat; 2655 cinfo.pop87 = 2; 2656 break; 2657 } 2658 2659 case CLIB.ftest: 2660 { 2661 const(char)* name = (config.exe & ex_unix) ? "__FTEST" : "_FTEST@"; 2662 s = symboly(name, 0); 2663 cinfo.retregs16 = mPSW; 2664 cinfo.retregs32 = mPSW; 2665 cinfo.flags = INFfloat; 2666 break; 2667 } 2668 2669 case CLIB.ftest0: 2670 { 2671 const(char)* name = (config.exe & ex_unix) ? "__FTEST0" : "_FTEST0@"; 2672 s = symboly(name, 0); 2673 cinfo.retregs16 = mPSW; 2674 cinfo.retregs32 = mPSW; 2675 cinfo.flags = INFfloat; 2676 break; 2677 } 2678 2679 case CLIB.fdiv87: 2680 { 2681 const(char)* name = (config.exe & ex_unix) ? "__FDIVP" : "_FDIVP"; 2682 s = symboly(name, mST0|mAX|mBX|mCX|mDX); 2683 cinfo.retregs16 = mST0; 2684 cinfo.retregs32 = mST0; 2685 cinfo.flags = INFfloat; 2686 cinfo.push87 = 1; 2687 cinfo.pop87 = 1; 2688 break; 2689 } 2690 2691 // Complex numbers 2692 case CLIB.cmul: 2693 { 2694 s = symboly("_Cmul", mST0|mST01); 2695 cinfo.retregs16 = mST01; 2696 cinfo.retregs32 = mST01; 2697 cinfo.flags = INF32|INFfloat; 2698 cinfo.push87 = 3; 2699 cinfo.pop87 = 5; 2700 break; 2701 } 2702 2703 case CLIB.cdiv: 2704 { 2705 s = symboly("_Cdiv", mAX|mCX|mDX|mST0|mST01); 2706 cinfo.retregs16 = mST01; 2707 cinfo.retregs32 = mST01; 2708 cinfo.flags = INF32|INFfloat; 2709 cinfo.push87 = 0; 2710 cinfo.pop87 = 2; 2711 break; 2712 } 2713 2714 case CLIB.ccmp: 2715 { 2716 s = symboly("_Ccmp", mAX|mST0|mST01); 2717 cinfo.retregs16 = mPSW; 2718 cinfo.retregs32 = mPSW; 2719 cinfo.flags = INF32|INFfloat; 2720 cinfo.push87 = 0; 2721 cinfo.pop87 = 4; 2722 break; 2723 } 2724 2725 case CLIB.u64_ldbl: 2726 { 2727 const(char)* name = (config.exe & ex_unix) ? "__U64_LDBL" : "_U64_LDBL"; 2728 s = symboly(name, mST0); 2729 cinfo.retregs16 = mST0; 2730 cinfo.retregs32 = mST0; 2731 cinfo.flags = INF32|INF64|INFfloat; 2732 cinfo.push87 = 2; 2733 cinfo.pop87 = 1; 2734 break; 2735 } 2736 2737 case CLIB.ld_u64: 2738 { 2739 const(char)* name = (config.exe & ex_unix) ? (config.objfmt == OBJ_ELF || 2740 config.objfmt == OBJ_MACH ? 2741 "__LDBLULLNG" : "___LDBLULLNG") 2742 : "__LDBLULLNG"; 2743 s = symboly(name, mST0|mAX|mDX); 2744 cinfo.retregs16 = 0; 2745 cinfo.retregs32 = mDX|mAX; 2746 cinfo.flags = INF32|INF64|INFfloat; 2747 cinfo.push87 = 1; 2748 cinfo.pop87 = 2; 2749 break; 2750 } 2751 2752 default: 2753 assert(0); 2754 } 2755 clibsyms[clib] = s; 2756 } 2757 2758 *ps = s; 2759 *pinfo = cinfo; 2760 } 2761 2762 /******************************** 2763 * Generate code sequence to call C runtime library support routine. 2764 * clib = CLIB.xxxx 2765 * keepmask = mask of registers not to destroy. Currently can 2766 * handle only 1. Should use a temporary rather than 2767 * push/pop for speed. 2768 */ 2769 2770 void callclib(ref CodeBuilder cdb, elem* e, uint clib, regm_t* pretregs, regm_t keepmask) 2771 { 2772 //printf("callclib(e = %p, clib = %d, *pretregs = %s, keepmask = %s\n", e, clib, regm_str(*pretregs), regm_str(keepmask)); 2773 //elem_print(e); 2774 2775 Symbol* s; 2776 ClibInfo* cinfo; 2777 getClibInfo(clib, &s, &cinfo); 2778 2779 if (I16) 2780 assert(!(cinfo.flags & (INF32 | INF64))); 2781 getregs(cdb,(~s.Sregsaved & (mES | mBP | ALLREGS)) & ~keepmask); // mask of regs destroyed 2782 keepmask &= ~s.Sregsaved; 2783 int npushed = numbitsset(keepmask); 2784 CodeBuilder cdbpop; 2785 cdbpop.ctor(); 2786 gensaverestore(keepmask, cdb, cdbpop); 2787 2788 save87regs(cdb,cinfo.push87); 2789 for (int i = 0; i < cinfo.push87; i++) 2790 push87(cdb); 2791 2792 for (int i = 0; i < cinfo.pop87; i++) 2793 pop87(); 2794 2795 if (config.target_cpu >= TARGET_80386 && clib == CLIB.lmul && !I32) 2796 { 2797 static immutable ubyte[23] lmul = 2798 [ 2799 0x66,0xc1,0xe1,0x10, // shl ECX,16 2800 0x8b,0xcb, // mov CX,BX ;ECX = CX,BX 2801 0x66,0xc1,0xe0,0x10, // shl EAX,16 2802 0x66,0x0f,0xac,0xd0,0x10, // shrd EAX,EDX,16 ;EAX = DX,AX 2803 0x66,0xf7,0xe1, // mul ECX 2804 0x66,0x0f,0xa4,0xc2,0x10, // shld EDX,EAX,16 ;DX,AX = EAX 2805 ]; 2806 2807 cdb.genasm(cast(char*)lmul.ptr, lmul.sizeof); 2808 } 2809 else 2810 { 2811 makeitextern(s); 2812 int nalign = 0; 2813 int pushebx = (cinfo.flags & INFpushebx) != 0; 2814 int pushall = (cinfo.flags & INFpusheabcdx) != 0; 2815 if (STACKALIGN >= 16) 2816 { // Align the stack (assume no args on stack) 2817 int npush = (npushed + pushebx + 4 * pushall) * REGSIZE + stackpush; 2818 if (npush & (STACKALIGN - 1)) 2819 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 2820 cod3_stackadj(cdb, nalign); 2821 } 2822 } 2823 if (pushebx) 2824 { 2825 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_FREEBSD | EX_FREEBSD64 | EX_DRAGONFLYBSD64)) 2826 { 2827 cdb.gen1(0x50 + CX); // PUSH ECX 2828 cdb.gen1(0x50 + BX); // PUSH EBX 2829 cdb.gen1(0x50 + DX); // PUSH EDX 2830 cdb.gen1(0x50 + AX); // PUSH EAX 2831 nalign += 4 * REGSIZE; 2832 } 2833 else 2834 { 2835 cdb.gen1(0x50 + BX); // PUSH EBX 2836 nalign += REGSIZE; 2837 } 2838 } 2839 if (pushall) 2840 { 2841 cdb.gen1(0x50 + CX); // PUSH ECX 2842 cdb.gen1(0x50 + BX); // PUSH EBX 2843 cdb.gen1(0x50 + DX); // PUSH EDX 2844 cdb.gen1(0x50 + AX); // PUSH EAX 2845 } 2846 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) 2847 { 2848 // Note: not for OSX 2849 /* Pass EBX on the stack instead, this is because EBX is used 2850 * for shared library function calls 2851 */ 2852 if (config.flags3 & CFG3pic) 2853 { 2854 load_localgot(cdb); // EBX gets set to this value 2855 } 2856 } 2857 2858 cdb.gencs(LARGECODE ? 0x9A : 0xE8,0,FLfunc,s); // CALL s 2859 if (nalign) 2860 cod3_stackadj(cdb, -nalign); 2861 calledafunc = 1; 2862 2863 version (SCPP) 2864 { 2865 if (I16 && // bug in Optlink for weak references 2866 config.flags3 & CFG3wkfloat && 2867 (cinfo.flags & (INFfloat | INFwkdone)) == INFfloat) 2868 { 2869 cinfo.flags |= INFwkdone; 2870 makeitextern(getRtlsym(RTLSYM_INTONLY)); 2871 objmod.wkext(s, getRtlsym(RTLSYM_INTONLY)); 2872 } 2873 } 2874 } 2875 if (I16) 2876 stackpush -= cinfo.pop; 2877 regm_t retregs = I16 ? cinfo.retregs16 : cinfo.retregs32; 2878 cdb.append(cdbpop); 2879 fixresult(cdb, e, retregs, pretregs); 2880 } 2881 2882 2883 /************************************************* 2884 * Helper function for converting OPparam's into array of Parameters. 2885 */ 2886 struct Parameter { elem* e; reg_t reg; reg_t reg2; uint numalign; } 2887 2888 //void fillParameters(elem* e, Parameter* parameters, int* pi); 2889 2890 void fillParameters(elem* e, Parameter* parameters, int* pi) 2891 { 2892 if (e.Eoper == OPparam) 2893 { 2894 fillParameters(e.EV.E1, parameters, pi); 2895 fillParameters(e.EV.E2, parameters, pi); 2896 freenode(e); 2897 } 2898 else 2899 { 2900 parameters[*pi].e = e; 2901 (*pi)++; 2902 } 2903 } 2904 2905 /*********************************** 2906 * tyf: type of the function 2907 */ 2908 FuncParamRegs FuncParamRegs_create(tym_t tyf) 2909 { 2910 FuncParamRegs result; 2911 2912 result.tyf = tyf; 2913 2914 if (I16) 2915 { 2916 result.numintegerregs = 0; 2917 result.numfloatregs = 0; 2918 } 2919 else if (I32) 2920 { 2921 if (tyf == TYjfunc) 2922 { 2923 static immutable ubyte[1] reglist1 = [ AX ]; 2924 result.argregs = ®list1[0]; 2925 result.numintegerregs = reglist1.length; 2926 } 2927 else if (tyf == TYmfunc) 2928 { 2929 static immutable ubyte[1] reglist2 = [ CX ]; 2930 result.argregs = ®list2[0]; 2931 result.numintegerregs = reglist2.length; 2932 } 2933 else 2934 result.numintegerregs = 0; 2935 result.numfloatregs = 0; 2936 } 2937 else if (I64 && config.exe == EX_WIN64) 2938 { 2939 static immutable ubyte[4] reglist3 = [ CX,DX,R8,R9 ]; 2940 result.argregs = ®list3[0]; 2941 result.numintegerregs = reglist3.length; 2942 2943 static immutable ubyte[4] freglist3 = [ XMM0, XMM1, XMM2, XMM3 ]; 2944 result.floatregs = &freglist3[0]; 2945 result.numfloatregs = freglist3.length; 2946 } 2947 else if (I64) 2948 { 2949 static immutable ubyte[6] reglist4 = [ DI,SI,DX,CX,R8,R9 ]; 2950 result.argregs = ®list4[0]; 2951 result.numintegerregs = reglist4.length; 2952 2953 static immutable ubyte[8] freglist4 = [ XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7 ]; 2954 result.floatregs = &freglist4[0]; 2955 result.numfloatregs = freglist4.length; 2956 } 2957 else 2958 assert(0); 2959 return result; 2960 } 2961 2962 /***************************************** 2963 * Allocate parameter of type t and ty to registers *preg1 and *preg2. 2964 * Params: 2965 * t = type, valid only if ty is TYstruct or TYarray 2966 * Returns: 2967 * false not allocated to any register 2968 * true *preg1, *preg2 set to allocated register pair 2969 */ 2970 2971 //bool type_jparam2(type* t, tym_t ty); 2972 2973 private bool type_jparam2(type* t, tym_t ty) 2974 { 2975 ty = tybasic(ty); 2976 2977 if (tyfloating(ty)) 2978 return false; 2979 else if (ty == TYstruct || ty == TYarray) 2980 { 2981 type_debug(t); 2982 targ_size_t sz = type_size(t); 2983 return (sz <= _tysize[TYnptr]) && 2984 (config.exe == EX_WIN64 || sz == 1 || sz == 2 || sz == 4 || sz == 8); 2985 } 2986 else if (tysize(ty) <= _tysize[TYnptr]) 2987 return true; 2988 return false; 2989 } 2990 2991 int FuncParamRegs_alloc(ref FuncParamRegs fpr, type* t, tym_t ty, reg_t* preg1, reg_t* preg2) 2992 { 2993 //printf("FuncParamRegs::alloc(ty: TY%sm t: %p)\n", tystring[tybasic(ty)], t); 2994 //if (t) type_print(t); 2995 2996 *preg1 = NOREG; 2997 *preg2 = NOREG; 2998 2999 type* t2 = null; 3000 tym_t ty2 = TYMAX; 3001 3002 tym_t tyb = tybasic(ty); 3003 3004 // Treat array of 1 the same as its element type 3005 // (Don't put volatile parameters in registers) 3006 if (tyb == TYarray && tybasic(t.Tty) == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile)) 3007 { 3008 t = t.Tnext; 3009 tyb = tybasic(t.Tty); 3010 } 3011 3012 if (tyb == TYstruct && type_zeroSize(t, fpr.tyf)) 3013 return 0; // don't allocate into registers 3014 3015 ++fpr.i; 3016 3017 // If struct just wraps another type 3018 if (tyb == TYstruct && tybasic(t.Tty) == TYstruct) 3019 { 3020 if (config.exe == EX_WIN64) 3021 { 3022 /* Structs occupy a general purpose register, regardless of the struct 3023 * size or the number & types of its fields. 3024 */ 3025 t = null; 3026 ty = TYnptr; 3027 } 3028 else 3029 { 3030 type* targ1 = t.Ttag.Sstruct.Sarg1type; 3031 type* targ2 = t.Ttag.Sstruct.Sarg2type; 3032 if (targ1) 3033 { 3034 t = targ1; 3035 ty = t.Tty; 3036 if (targ2) 3037 { 3038 t2 = targ2; 3039 ty2 = t2.Tty; 3040 } 3041 } 3042 else if (I64 && !targ2) 3043 return 0; 3044 } 3045 } 3046 3047 reg_t* preg = preg1; 3048 int regcntsave = fpr.regcnt; 3049 int xmmcntsave = fpr.xmmcnt; 3050 3051 if (config.exe == EX_WIN64) 3052 { 3053 if (tybasic(ty) == TYcfloat) 3054 { 3055 ty = TYnptr; // treat like a struct 3056 } 3057 } 3058 else if (I64) 3059 { 3060 if ((tybasic(ty) == TYcent || tybasic(ty) == TYucent) && 3061 fpr.numintegerregs - fpr.regcnt >= 2) 3062 { 3063 // Allocate to register pair 3064 *preg1 = fpr.argregs[fpr.regcnt]; 3065 *preg2 = fpr.argregs[fpr.regcnt + 1]; 3066 fpr.regcnt += 2; 3067 return 1; 3068 } 3069 3070 if (tybasic(ty) == TYcdouble && 3071 fpr.numfloatregs - fpr.xmmcnt >= 2) 3072 { 3073 // Allocate to register pair 3074 *preg1 = fpr.floatregs[fpr.xmmcnt]; 3075 *preg2 = fpr.floatregs[fpr.xmmcnt + 1]; 3076 fpr.xmmcnt += 2; 3077 return 1; 3078 } 3079 } 3080 3081 for (int j = 0; j < 2; j++) 3082 { 3083 if (fpr.regcnt < fpr.numintegerregs) 3084 { 3085 if ((I64 || (fpr.i == 1 && (fpr.tyf == TYjfunc || fpr.tyf == TYmfunc))) && 3086 type_jparam2(t, ty)) 3087 { 3088 *preg = fpr.argregs[fpr.regcnt]; 3089 ++fpr.regcnt; 3090 if (config.exe == EX_WIN64) 3091 ++fpr.xmmcnt; 3092 goto Lnext; 3093 } 3094 } 3095 if (fpr.xmmcnt < fpr.numfloatregs) 3096 { 3097 if (tyxmmreg(ty)) 3098 { 3099 *preg = fpr.floatregs[fpr.xmmcnt]; 3100 if (config.exe == EX_WIN64) 3101 ++fpr.regcnt; 3102 ++fpr.xmmcnt; 3103 goto Lnext; 3104 } 3105 } 3106 // Failed to allocate to a register 3107 if (j == 1) 3108 { /* Unwind first preg1 assignment, because it's both or nothing 3109 */ 3110 *preg1 = NOREG; 3111 fpr.regcnt = regcntsave; 3112 fpr.xmmcnt = xmmcntsave; 3113 } 3114 return 0; 3115 3116 Lnext: 3117 if (!t2) 3118 break; 3119 preg = preg2; 3120 t = t2; 3121 ty = ty2; 3122 } 3123 return 1; 3124 } 3125 3126 /******************************* 3127 * Generate code sequence for function call. 3128 */ 3129 3130 void cdfunc(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3131 { 3132 //printf("cdfunc()\n"); elem_print(e); 3133 assert(e); 3134 uint numpara = 0; // bytes of parameters 3135 uint numalign = 0; // bytes to align stack before pushing parameters 3136 uint stackpushsave = stackpush; // so we can compute # of parameters 3137 cgstate.stackclean++; 3138 regm_t keepmsk = 0; 3139 int xmmcnt = 0; 3140 tym_t tyf = tybasic(e.EV.E1.Ety); // the function type 3141 3142 // Easier to deal with parameters as an array: parameters[0..np] 3143 int np = OTbinary(e.Eoper) ? el_nparams(e.EV.E2) : 0; 3144 Parameter *parameters = cast(Parameter *)alloca(np * Parameter.sizeof); 3145 3146 if (np) 3147 { 3148 int n = 0; 3149 fillParameters(e.EV.E2, parameters, &n); 3150 assert(n == np); 3151 } 3152 3153 Symbol *sf = null; // symbol of the function being called 3154 if (e.EV.E1.Eoper == OPvar) 3155 sf = e.EV.E1.EV.Vsym; 3156 3157 /* Assume called function access statics 3158 */ 3159 if (config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_FREEBSD | EX_FREEBSD64) && 3160 config.flags3 & CFG3pic) 3161 cgstate.accessedTLS = true; 3162 3163 /* Special handling for call to __tls_get_addr, we must save registers 3164 * before evaluating the parameter, so that the parameter load and call 3165 * are adjacent. 3166 */ 3167 if (np == 1 && sf) 3168 { 3169 if (sf == tls_get_addr_sym) 3170 getregs(cdb, ~sf.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3171 } 3172 3173 uint stackalign = REGSIZE; 3174 if (tyf == TYf16func) 3175 stackalign = 2; 3176 // Figure out which parameters go in registers. 3177 // Compute numpara, the total bytes pushed on the stack 3178 FuncParamRegs fpr = FuncParamRegs_create(tyf); 3179 for (int i = np; --i >= 0;) 3180 { 3181 elem *ep = parameters[i].e; 3182 uint psize = cast(uint)_align(stackalign, paramsize(ep, tyf)); // align on stack boundary 3183 if (config.exe == EX_WIN64) 3184 { 3185 //printf("[%d] size = %u, numpara = %d ep = %p ", i, psize, numpara, ep); WRTYxx(ep.Ety); printf("\n"); 3186 debug 3187 if (psize > REGSIZE) elem_print(e); 3188 3189 assert(psize <= REGSIZE); 3190 psize = REGSIZE; 3191 } 3192 //printf("[%d] size = %u, numpara = %d ", i, psize, numpara); WRTYxx(ep.Ety); printf("\n"); 3193 if (FuncParamRegs_alloc(fpr, ep.ET, ep.Ety, ¶meters[i].reg, ¶meters[i].reg2)) 3194 { 3195 if (config.exe == EX_WIN64) 3196 numpara += REGSIZE; // allocate stack space for it anyway 3197 continue; // goes in register, not stack 3198 } 3199 3200 // Parameter i goes on the stack 3201 parameters[i].reg = NOREG; 3202 uint alignsize = el_alignsize(ep); 3203 parameters[i].numalign = 0; 3204 if (alignsize > stackalign && 3205 (I64 || (alignsize >= 16 && 3206 (config.exe & (EX_OSX | EX_LINUX) && (tyaggregate(ep.Ety) || tyvector(ep.Ety)))))) 3207 { 3208 if (alignsize > STACKALIGN) 3209 { 3210 STACKALIGN = alignsize; 3211 enforcealign = true; 3212 } 3213 uint newnumpara = (numpara + (alignsize - 1)) & ~(alignsize - 1); 3214 parameters[i].numalign = newnumpara - numpara; 3215 numpara = newnumpara; 3216 assert(config.exe != EX_WIN64); 3217 } 3218 numpara += psize; 3219 } 3220 3221 if (config.exe == EX_WIN64) 3222 { 3223 if (numpara < 4 * REGSIZE) 3224 numpara = 4 * REGSIZE; 3225 } 3226 3227 //printf("numpara = %d, stackpush = %d\n", numpara, stackpush); 3228 assert((numpara & (REGSIZE - 1)) == 0); 3229 assert((stackpush & (REGSIZE - 1)) == 0); 3230 3231 /* Should consider reordering the order of evaluation of the parameters 3232 * so that args that go into registers are evaluated after args that get 3233 * pushed. We can reorder args that are constants or relconst's. 3234 */ 3235 3236 /* Determine if we should use cgstate.funcarg for the parameters or push them 3237 */ 3238 bool usefuncarg = false; 3239 static if (0) 3240 { 3241 printf("test1 %d %d %d %d %d %d %d %d\n", (config.flags4 & CFG4speed)!=0, !Alloca.size, 3242 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)), 3243 cast(int)numpara, !stackpush, 3244 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos), 3245 (!typfunc(tyf) || sf && sf.Sflags & SFLexit), !I16); 3246 } 3247 if (config.flags4 & CFG4speed && 3248 !Alloca.size && 3249 /* The cleanup code calls a local function, leaving the return address on 3250 * the top of the stack. If parameters are placed there, the return address 3251 * is stepped on. 3252 * A better solution is turn this off only inside the cleanup code. 3253 */ 3254 !usednteh && 3255 !calledFinally && 3256 (numpara || config.exe == EX_WIN64) && 3257 stackpush == 0 && // cgstate.funcarg needs to be at top of stack 3258 (cgstate.funcargtos == ~0 || numpara < cgstate.funcargtos) && 3259 (!(typfunc(tyf) || tyf == TYhfunc) || sf && sf.Sflags & SFLexit) && 3260 !anyiasm && !I16 3261 ) 3262 { 3263 for (int i = 0; i < np; i++) 3264 { 3265 elem* ep = parameters[i].e; 3266 int preg = parameters[i].reg; 3267 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3268 if (preg == NOREG) 3269 { 3270 switch (ep.Eoper) 3271 { 3272 case OPstrctor: 3273 case OPstrthis: 3274 case OPstrpar: 3275 case OPnp_fp: 3276 goto Lno; 3277 3278 default: 3279 break; 3280 } 3281 } 3282 } 3283 3284 if (numpara > cgstate.funcarg.size) 3285 { // New high water mark 3286 //printf("increasing size from %d to %d\n", (int)cgstate.funcarg.size, (int)numpara); 3287 cgstate.funcarg.size = numpara; 3288 } 3289 usefuncarg = true; 3290 } 3291 Lno: 3292 3293 /* Adjust start of the stack so after all args are pushed, 3294 * the stack will be aligned. 3295 */ 3296 if (!usefuncarg && STACKALIGN >= 16 && (numpara + stackpush) & (STACKALIGN - 1)) 3297 { 3298 numalign = STACKALIGN - ((numpara + stackpush) & (STACKALIGN - 1)); 3299 cod3_stackadj(cdb, numalign); 3300 cdb.genadjesp(numalign); 3301 stackpush += numalign; 3302 stackpushsave += numalign; 3303 } 3304 assert(stackpush == stackpushsave); 3305 if (config.exe == EX_WIN64) 3306 { 3307 //printf("np = %d, numpara = %d, stackpush = %d\n", np, numpara, stackpush); 3308 assert(numpara == ((np < 4) ? 4 * REGSIZE : np * REGSIZE)); 3309 3310 // Allocate stack space for four entries anyway 3311 // http://msdn.microsoft.com/en-US/library/ew5tede7(v=vs.80) 3312 } 3313 3314 int[XMM7 + 1] regsaved = void; 3315 memset(regsaved.ptr, -1, regsaved.sizeof); 3316 CodeBuilder cdbrestore; 3317 cdbrestore.ctor(); 3318 regm_t saved = 0; 3319 targ_size_t funcargtossave = cgstate.funcargtos; 3320 targ_size_t funcargtos = numpara; 3321 //printf("funcargtos1 = %d\n", cast(int)funcargtos); 3322 3323 /* Parameters go into the registers RDI,RSI,RDX,RCX,R8,R9 3324 * float and double parameters go into XMM0..XMM7 3325 * For variadic functions, count of XMM registers used goes in AL 3326 */ 3327 for (int i = 0; i < np; i++) 3328 { 3329 elem* ep = parameters[i].e; 3330 int preg = parameters[i].reg; 3331 //printf("parameter[%d] = %d, np = %d\n", i, preg, np); 3332 if (preg == NOREG) 3333 { 3334 /* Push parameter on stack, but keep track of registers used 3335 * in the process. If they interfere with keepmsk, we'll have 3336 * to save/restore them. 3337 */ 3338 CodeBuilder cdbsave; 3339 cdbsave.ctor(); 3340 regm_t overlap = msavereg & keepmsk; 3341 msavereg |= keepmsk; 3342 CodeBuilder cdbparams; 3343 cdbparams.ctor(); 3344 if (usefuncarg) 3345 movParams(cdbparams, ep, stackalign, cast(uint)funcargtos, tyf); 3346 else 3347 pushParams(cdbparams,ep,stackalign, tyf); 3348 regm_t tosave = keepmsk & ~msavereg; 3349 msavereg &= ~keepmsk | overlap; 3350 3351 // tosave is the mask to save and restore 3352 for (reg_t j = 0; tosave; j++) 3353 { 3354 regm_t mi = mask(j); 3355 assert(j <= XMM7); 3356 if (mi & tosave) 3357 { 3358 uint idx; 3359 regsave.save(cdbsave, j, &idx); 3360 regsave.restore(cdbrestore, j, idx); 3361 saved |= mi; 3362 keepmsk &= ~mi; // don't need to keep these for rest of params 3363 tosave &= ~mi; 3364 } 3365 } 3366 3367 cdb.append(cdbsave); 3368 cdb.append(cdbparams); 3369 3370 // Alignment for parameter comes after it got pushed 3371 const uint numalignx = parameters[i].numalign; 3372 if (usefuncarg) 3373 { 3374 funcargtos -= _align(stackalign, paramsize(ep, tyf)) + numalignx; 3375 cgstate.funcargtos = funcargtos; 3376 } 3377 else if (numalignx) 3378 { 3379 cod3_stackadj(cdb, numalignx); 3380 cdb.genadjesp(numalignx); 3381 stackpush += numalignx; 3382 } 3383 } 3384 else 3385 { 3386 // Goes in register preg, not stack 3387 regm_t retregs = mask(preg); 3388 if (retregs & XMMREGS) 3389 ++xmmcnt; 3390 int preg2 = parameters[i].reg2; 3391 reg_t mreg,lreg; 3392 if (preg2 != NOREG) 3393 { 3394 // BUG: still doesn't handle case of mXMM0|mAX or mAX|mXMM0 3395 assert(ep.Eoper != OPstrthis); 3396 if (mask(preg2) & XMMREGS) 3397 { 3398 ++xmmcnt; 3399 lreg = XMM0; 3400 mreg = XMM1; 3401 } 3402 else 3403 { 3404 lreg = mask(preg ) & mLSW ? cast(reg_t)preg : AX; 3405 mreg = mask(preg2) & mMSW ? cast(reg_t)preg2 : DX; 3406 } 3407 retregs = mask(mreg) | mask(lreg); 3408 3409 CodeBuilder cdbsave; 3410 cdbsave.ctor(); 3411 if (keepmsk & retregs) 3412 { 3413 regm_t tosave = keepmsk & retregs; 3414 3415 // tosave is the mask to save and restore 3416 for (reg_t j = 0; tosave; j++) 3417 { 3418 regm_t mi = mask(j); 3419 assert(j <= XMM7); 3420 if (mi & tosave) 3421 { 3422 uint idx; 3423 regsave.save(cdbsave, j, &idx); 3424 regsave.restore(cdbrestore, j, idx); 3425 saved |= mi; 3426 keepmsk &= ~mi; // don't need to keep these for rest of params 3427 tosave &= ~mi; 3428 } 3429 } 3430 } 3431 cdb.append(cdbsave); 3432 3433 scodelem(cdb, ep, &retregs, keepmsk, false); 3434 3435 // Move result [mreg,lreg] into parameter registers from [preg2,preg] 3436 retregs = 0; 3437 if (preg != lreg) 3438 retregs |= mask(preg); 3439 if (preg2 != mreg) 3440 retregs |= mask(preg2); 3441 getregs(cdb,retregs); 3442 3443 tym_t ty1 = tybasic(ep.Ety); 3444 tym_t ty2 = ty1; 3445 if (ty1 == TYstruct) 3446 { 3447 type* targ1 = ep.ET.Ttag.Sstruct.Sarg1type; 3448 type* targ2 = ep.ET.Ttag.Sstruct.Sarg2type; 3449 if (targ1) 3450 ty1 = targ1.Tty; 3451 if (targ2) 3452 ty2 = targ2.Tty; 3453 } 3454 else if (tyrelax(ty1) == TYcent) 3455 ty1 = ty2 = TYllong; 3456 else if (tybasic(ty1) == TYcdouble) 3457 ty1 = ty2 = TYdouble; 3458 3459 foreach (v; 0 .. 2) 3460 { 3461 if (v ^ (preg != mreg)) 3462 genmovreg(cdb, preg, lreg, ty1); 3463 else 3464 genmovreg(cdb, preg2, mreg, ty2); 3465 } 3466 3467 retregs = mask(preg) | mask(preg2); 3468 } 3469 else if (ep.Eoper == OPstrthis) 3470 { 3471 getregs(cdb,retregs); 3472 // LEA preg,np[RSP] 3473 uint delta = stackpush - ep.EV.Vuns; // stack delta to parameter 3474 cdb.genc1(LEA, 3475 (modregrm(0,4,SP) << 8) | modregxrm(2,preg,4), FLconst,delta); 3476 if (I64) 3477 code_orrex(cdb.last(), REX_W); 3478 } 3479 else if (ep.Eoper == OPstrpar && config.exe == EX_WIN64 && type_size(ep.ET) == 0) 3480 { 3481 } 3482 else 3483 { 3484 scodelem(cdb, ep, &retregs, keepmsk, false); 3485 } 3486 keepmsk |= retregs; // don't change preg when evaluating func address 3487 } 3488 } 3489 3490 if (config.exe == EX_WIN64) 3491 { // Allocate stack space for four entries anyway 3492 // http://msdn.microsoft.com/en-US/library/ew5tede7(v=vs.80) 3493 { uint sz = 4 * REGSIZE; 3494 if (usefuncarg) 3495 { 3496 funcargtos -= sz; 3497 cgstate.funcargtos = funcargtos; 3498 } 3499 else 3500 { 3501 cod3_stackadj(cdb, sz); 3502 cdb.genadjesp(sz); 3503 stackpush += sz; 3504 } 3505 } 3506 3507 /* Variadic functions store XMM parameters into their corresponding GP registers 3508 */ 3509 for (int i = 0; i < np; i++) 3510 { 3511 int preg = parameters[i].reg; 3512 regm_t retregs = mask(preg); 3513 if (retregs & XMMREGS) 3514 { 3515 reg_t reg; 3516 switch (preg) 3517 { 3518 case XMM0: reg = CX; break; 3519 case XMM1: reg = DX; break; 3520 case XMM2: reg = R8; break; 3521 case XMM3: reg = R9; break; 3522 3523 default: assert(0); 3524 } 3525 getregs(cdb,mask(reg)); 3526 cdb.gen2(STOD,(REX_W << 16) | modregxrmx(3,preg-XMM0,reg)); // MOVD reg,preg 3527 } 3528 } 3529 } 3530 3531 // Restore any register parameters we saved 3532 getregs(cdb,saved); 3533 cdb.append(cdbrestore); 3534 keepmsk |= saved; 3535 3536 // Variadic functions store the number of XMM registers used in AL 3537 if (I64 && config.exe != EX_WIN64 && e.Eflags & EFLAGS_variadic) 3538 { 3539 getregs(cdb,mAX); 3540 movregconst(cdb,AX,xmmcnt,1); 3541 keepmsk |= mAX; 3542 } 3543 3544 //printf("funcargtos2 = %d\n", (int)funcargtos); 3545 assert(!usefuncarg || (funcargtos == 0 && cgstate.funcargtos == 0)); 3546 cgstate.stackclean--; 3547 3548 debug 3549 if (!usefuncarg && numpara != stackpush - stackpushsave) 3550 { 3551 printf("function %s\n", funcsym_p.Sident.ptr); 3552 printf("numpara = %d, stackpush = %d, stackpushsave = %d\n", numpara, stackpush, stackpushsave); 3553 elem_print(e); 3554 } 3555 3556 assert(usefuncarg || numpara == stackpush - stackpushsave); 3557 3558 funccall(cdb,e,numpara,numalign,pretregs,keepmsk,usefuncarg); 3559 cgstate.funcargtos = funcargtossave; 3560 } 3561 3562 /*********************************** 3563 */ 3564 3565 void cdstrthis(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 3566 { 3567 assert(tysize(e.Ety) == REGSIZE); 3568 const reg = findreg(*pretregs & allregs); 3569 getregs(cdb,mask(reg)); 3570 // LEA reg,np[ESP] 3571 uint np = stackpush - e.EV.Vuns; // stack delta to parameter 3572 cdb.genc1(LEA,(modregrm(0,4,SP) << 8) | modregxrm(2,reg,4),FLconst,np); 3573 if (I64) 3574 code_orrex(cdb.last(), REX_W); 3575 fixresult(cdb, e, mask(reg), pretregs); 3576 } 3577 3578 /****************************** 3579 * Call function. All parameters have already been pushed onto the stack. 3580 * Params: 3581 * e = function call 3582 * numpara = size in bytes of all the parameters 3583 * numalign = amount the stack was aligned by before the parameters were pushed 3584 * pretregs = where return value goes 3585 * keepmsk = registers to not change when evaluating the function address 3586 * usefuncarg = using cgstate.funcarg, so no need to adjust stack after func return 3587 */ 3588 3589 private void funccall(ref CodeBuilder cdb, elem* e, uint numpara, uint numalign, 3590 regm_t* pretregs,regm_t keepmsk, bool usefuncarg) 3591 { 3592 //printf("%s ", funcsym_p.Sident.ptr); 3593 //printf("funccall(e = %p, *pretregs = %s, numpara = %d, numalign = %d, usefuncarg=%d)\n",e,regm_str(*pretregs),numpara,numalign,usefuncarg); 3594 calledafunc = 1; 3595 // Determine if we need frame for function prolog/epilog 3596 3597 static if (TARGET_WINDOS) 3598 { 3599 if (config.memmodel == Vmodel) 3600 { 3601 if (tyfarfunc(funcsym_p.ty())) 3602 needframe = true; 3603 } 3604 } 3605 3606 code cs; 3607 regm_t retregs; 3608 Symbol* s; 3609 3610 elem* e1 = e.EV.E1; 3611 tym_t tym1 = tybasic(e1.Ety); 3612 char farfunc = tyfarfunc(tym1) || tym1 == TYifunc; 3613 3614 CodeBuilder cdbe; 3615 cdbe.ctor(); 3616 3617 if (e1.Eoper == OPvar) 3618 { // Call function directly 3619 3620 if (!tyfunc(tym1)) 3621 WRTYxx(tym1); 3622 assert(tyfunc(tym1)); 3623 s = e1.EV.Vsym; 3624 if (s.Sflags & SFLexit) 3625 { } 3626 else if (s != tls_get_addr_sym) 3627 save87(cdb); // assume 8087 regs are all trashed 3628 3629 // Function calls may throw Errors, unless marked that they don't 3630 if (s == funcsym_p || !s.Sfunc || !(s.Sfunc.Fflags3 & Fnothrow)) 3631 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3632 3633 if (s.Sflags & SFLexit) 3634 { 3635 // Function doesn't return, so don't worry about registers 3636 // it may use 3637 } 3638 else if (!tyfunc(s.ty()) || !(config.flags4 & CFG4optimized)) 3639 // so we can replace func at runtime 3640 getregs(cdbe,~fregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3641 else 3642 getregs(cdbe,~s.Sregsaved & (mBP | ALLREGS | mES | XMMREGS)); 3643 if (strcmp(s.Sident.ptr, "alloca") == 0) 3644 { 3645 s = getRtlsym(RTLSYM_ALLOCA); 3646 makeitextern(s); 3647 int areg = CX; 3648 if (config.exe == EX_WIN64) 3649 areg = DX; 3650 getregs(cdbe, mask(areg)); 3651 cdbe.genc(LEA, modregrm(2, areg, BPRM), FLallocatmp, 0, 0, 0); // LEA areg,&localsize[BP] 3652 if (I64) 3653 code_orrex(cdbe.last(), REX_W); 3654 Alloca.size = REGSIZE; 3655 } 3656 if (sytab[s.Sclass] & SCSS) // if function is on stack (!) 3657 { 3658 retregs = allregs & ~keepmsk; 3659 s.Sflags &= ~GTregcand; 3660 s.Sflags |= SFLread; 3661 cdrelconst(cdbe,e1,&retregs); 3662 if (farfunc) 3663 { 3664 const reg = findregmsw(retregs); 3665 const lsreg = findreglsw(retregs); 3666 floatreg = true; // use float register 3667 reflocal = true; 3668 cdbe.genc1(0x89, // MOV floatreg+2,reg 3669 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3670 cdbe.genc1(0x89, // MOV floatreg,lsreg 3671 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3672 if (tym1 == TYifunc) 3673 cdbe.gen1(0x9C); // PUSHF 3674 cdbe.genc1(0xFF, // CALL [floatreg] 3675 modregrm(2, 3, BPRM), FLfltreg, 0); 3676 } 3677 else 3678 { 3679 const reg = findreg(retregs); 3680 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 3681 if (I64) 3682 code_orrex(cdbe.last(), REX_W); 3683 } 3684 } 3685 else 3686 { 3687 int fl = FLfunc; 3688 if (!tyfunc(s.ty())) 3689 fl = el_fl(e1); 3690 if (tym1 == TYifunc) 3691 cdbe.gen1(0x9C); // PUSHF 3692 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 3693 { 3694 assert(!farfunc); 3695 if (s != tls_get_addr_sym) 3696 { 3697 //printf("call %s\n", s.Sident.ptr); 3698 load_localgot(cdb); 3699 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3700 } 3701 else if (I64) 3702 { 3703 /* Prepend 66 66 48 so GNU linker has patch room 3704 */ 3705 assert(!farfunc); 3706 cdbe.gen1(0x66); 3707 cdbe.gen1(0x66); 3708 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3709 cdbe.last().Irex = REX | REX_W; 3710 } 3711 else 3712 cdbe.gencs(0xE8, 0, fl, s); // CALL extern 3713 } 3714 else 3715 { 3716 cdbe.gencs(farfunc ? 0x9A : 0xE8,0,fl,s); // CALL extern 3717 } 3718 code_orflag(cdbe.last(), farfunc ? (CFseg | CFoff) : (CFselfrel | CFoff)); 3719 } 3720 } 3721 else 3722 { // Call function via pointer 3723 3724 // Function calls may throw Errors 3725 funcsym_p.Sfunc.Fflags3 &= ~Fnothrow; 3726 3727 if (e1.Eoper != OPind) { WRFL(cast(FL)el_fl(e1)); WROP(e1.Eoper); } 3728 save87(cdb); // assume 8087 regs are all trashed 3729 assert(e1.Eoper == OPind); 3730 elem *e11 = e1.EV.E1; 3731 tym_t e11ty = tybasic(e11.Ety); 3732 assert(!I16 || (e11ty == (farfunc ? TYfptr : TYnptr))); 3733 load_localgot(cdb); 3734 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 3735 { 3736 if (config.flags3 & CFG3pic && I32) 3737 keepmsk |= mBX; 3738 } 3739 3740 /* Mask of registers destroyed by the function call 3741 */ 3742 regm_t desmsk = (mBP | ALLREGS | mES | XMMREGS) & ~fregsaved; 3743 3744 // if we can't use loadea() 3745 if ((!OTleaf(e11.Eoper) || e11.Eoper == OPconst) && 3746 (e11.Eoper != OPind || e11.Ecount)) 3747 { 3748 retregs = allregs & ~keepmsk; 3749 cgstate.stackclean++; 3750 scodelem(cdbe,e11,&retregs,keepmsk,true); 3751 cgstate.stackclean--; 3752 // Kill registers destroyed by an arbitrary function call 3753 getregs(cdbe,desmsk); 3754 if (e11ty == TYfptr) 3755 { 3756 const reg = findregmsw(retregs); 3757 const lsreg = findreglsw(retregs); 3758 floatreg = true; // use float register 3759 reflocal = true; 3760 cdbe.genc1(0x89, // MOV floatreg+2,reg 3761 modregrm(2, reg, BPRM), FLfltreg, REGSIZE); 3762 cdbe.genc1(0x89, // MOV floatreg,lsreg 3763 modregrm(2, lsreg, BPRM), FLfltreg, 0); 3764 if (tym1 == TYifunc) 3765 cdbe.gen1(0x9C); // PUSHF 3766 cdbe.genc1(0xFF, // CALL [floatreg] 3767 modregrm(2, 3, BPRM), FLfltreg, 0); 3768 } 3769 else 3770 { 3771 const reg = findreg(retregs); 3772 cdbe.gen2(0xFF, modregrmx(3, 2, reg)); // CALL reg 3773 if (I64) 3774 code_orrex(cdbe.last(), REX_W); 3775 } 3776 } 3777 else 3778 { 3779 if (tym1 == TYifunc) 3780 cdb.gen1(0x9C); // PUSHF 3781 // CALL [function] 3782 cs.Iflags = 0; 3783 cgstate.stackclean++; 3784 loadea(cdbe, e11, &cs, 0xFF, farfunc ? 3 : 2, 0, keepmsk, desmsk); 3785 cgstate.stackclean--; 3786 freenode(e11); 3787 } 3788 s = null; 3789 } 3790 cdb.append(cdbe); 3791 freenode(e1); 3792 3793 /* See if we will need the frame pointer. 3794 Calculate it here so we can possibly use BP to fix the stack. 3795 */ 3796 static if (0) 3797 { 3798 if (!needframe) 3799 { 3800 // If there is a register available for this basic block 3801 if (config.flags4 & CFG4optimized && (ALLREGS & ~regcon.used)) 3802 { } 3803 else 3804 { 3805 for (SYMIDX si = 0; si < globsym.top; si++) 3806 { 3807 Symbol* s = globsym.tab[si]; 3808 3809 if (s.Sflags & GTregcand && type_size(s.Stype) != 0) 3810 { 3811 if (config.flags4 & CFG4optimized) 3812 { // If symbol is live in this basic block and 3813 // isn't already in a register 3814 if (s.Srange && vec_testbit(dfoidx, s.Srange) && 3815 s.Sfl != FLreg) 3816 { // Then symbol must be allocated on stack 3817 needframe = true; 3818 break; 3819 } 3820 } 3821 else 3822 { if (mfuncreg == 0) // if no registers left 3823 { needframe = true; 3824 break; 3825 } 3826 } 3827 } 3828 } 3829 } 3830 } 3831 } 3832 3833 retregs = regmask(e.Ety, tym1); 3834 3835 if (!usefuncarg) 3836 { 3837 // If stack needs cleanup 3838 if (s && s.Sflags & SFLexit) 3839 { 3840 if (config.fulltypes && TARGET_WINDOS) 3841 { 3842 // the stack walker evaluates the return address, not a byte of the 3843 // call instruction, so ensure there is an instruction byte after 3844 // the call that still has the same line number information 3845 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); 3846 } 3847 /* Function never returns, so don't need to generate stack 3848 * cleanup code. But still need to log the stack cleanup 3849 * as if it did return. 3850 */ 3851 cdb.genadjesp(-(numpara + numalign)); 3852 stackpush -= numpara + numalign; 3853 } 3854 else if ((OTbinary(e.Eoper) || config.exe == EX_WIN64) && 3855 (!typfunc(tym1) || config.exe == EX_WIN64)) 3856 { 3857 if (tym1 == TYhfunc) 3858 { // Hidden parameter is popped off by the callee 3859 cdb.genadjesp(-REGSIZE); 3860 stackpush -= REGSIZE; 3861 if (numpara + numalign > REGSIZE) 3862 genstackclean(cdb, numpara + numalign - REGSIZE, retregs); 3863 } 3864 else 3865 genstackclean(cdb, numpara + numalign, retregs); 3866 } 3867 else 3868 { 3869 cdb.genadjesp(-numpara); // popped off by the callee's 'RET numpara' 3870 stackpush -= numpara; 3871 if (numalign) // callee doesn't know about alignment adjustment 3872 genstackclean(cdb,numalign,retregs); 3873 } 3874 } 3875 3876 /* Special handling for functions which return a floating point 3877 value in the top of the 8087 stack. 3878 */ 3879 3880 if (retregs & mST0) 3881 { 3882 cdb.genadjfpu(1); 3883 if (*pretregs) // if we want the result 3884 { 3885 //assert(global87.stackused == 0); 3886 push87(cdb); // one item on 8087 stack 3887 fixresult87(cdb,e,retregs,pretregs); 3888 return; 3889 } 3890 else 3891 // Pop unused result off 8087 stack 3892 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 3893 } 3894 else if (retregs & mST01) 3895 { 3896 cdb.genadjfpu(2); 3897 if (*pretregs) // if we want the result 3898 { 3899 assert(global87.stackused == 0); 3900 push87(cdb); 3901 push87(cdb); // two items on 8087 stack 3902 fixresult_complex87(cdb, e, retregs, pretregs); 3903 return; 3904 } 3905 else 3906 { 3907 // Pop unused result off 8087 stack 3908 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 3909 cdb.gen2(0xDD, modregrm(3, 3, 0)); // FPOP 3910 } 3911 } 3912 3913 fixresult(cdb, e, retregs, pretregs); 3914 } 3915 3916 /*************************** 3917 * Determine size of argument e that will be pushed. 3918 */ 3919 3920 targ_size_t paramsize(elem* e, tym_t tyf) 3921 { 3922 assert(e.Eoper != OPparam); 3923 targ_size_t szb; 3924 tym_t tym = tybasic(e.Ety); 3925 if (tyscalar(tym)) 3926 szb = size(tym); 3927 else if (tym == TYstruct || tym == TYarray) 3928 szb = type_parameterSize(e.ET, tyf); 3929 else 3930 { 3931 WRTYxx(tym); 3932 assert(0); 3933 } 3934 return szb; 3935 } 3936 3937 /*************************** 3938 * Generate code to move argument e on the stack. 3939 */ 3940 3941 private void movParams(ref CodeBuilder cdb, elem* e, uint stackalign, uint funcargtos, tym_t tyf) 3942 { 3943 //printf("movParams(e = %p, stackalign = %d, funcargtos = %d)\n", e, stackalign, funcargtos); 3944 //printf("movParams()\n"); elem_print(e); 3945 assert(!I16); 3946 assert(e && e.Eoper != OPparam); 3947 3948 tym_t tym = tybasic(e.Ety); 3949 if (tyfloating(tym)) 3950 objmod.fltused(); 3951 3952 int grex = I64 ? REX_W << 16 : 0; 3953 3954 targ_size_t szb = paramsize(e, tyf); // size before alignment 3955 targ_size_t sz = _align(stackalign, szb); // size after alignment 3956 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 3957 assert((sz & (REGSIZE - 1)) == 0); 3958 //printf("szb = %d sz = %d\n", (int)szb, (int)sz); 3959 3960 code cs; 3961 cs.Iflags = 0; 3962 cs.Irex = 0; 3963 switch (e.Eoper) 3964 { 3965 case OPstrctor: 3966 case OPstrthis: 3967 case OPstrpar: 3968 case OPnp_fp: 3969 assert(0); 3970 3971 case OPrelconst: 3972 { 3973 int fl; 3974 if (!evalinregister(e) && 3975 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 3976 ((fl = el_fl(e)) == FLdata || fl == FLudata || fl == FLextern) 3977 ) 3978 { 3979 // MOV -stackoffset[EBP],&variable 3980 cs.Iop = 0xC7; 3981 cs.Irm = modregrm(2,0,BPRM); 3982 if (I64 && sz == 8) 3983 cs.Irex |= REX_W; 3984 cs.IFL1 = FLfuncarg; 3985 cs.IEV1.Voffset = funcargtos - REGSIZE; 3986 cs.IEV2.Voffset = e.EV.Voffset; 3987 cs.IFL2 = cast(ubyte)fl; 3988 cs.IEV2.Vsym = e.EV.Vsym; 3989 cs.Iflags |= CFoff; 3990 cdb.gen(&cs); 3991 return; 3992 } 3993 break; 3994 } 3995 3996 case OPconst: 3997 if (!evalinregister(e)) 3998 { 3999 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 4000 cs.Irm = modregrm(2,0,BPRM); 4001 cs.IFL1 = FLfuncarg; 4002 cs.IEV1.Voffset = funcargtos - sz; 4003 cs.IFL2 = FLconst; 4004 targ_size_t *p = cast(targ_size_t *) &(e.EV); 4005 cs.IEV2.Vsize_t = *p; 4006 if (I64 && tym == TYcldouble) 4007 // The alignment of EV.Vcldouble is not the same on the compiler 4008 // as on the target 4009 goto Lbreak; 4010 if (I64 && sz >= 8) 4011 { 4012 int i = cast(int)sz; 4013 do 4014 { 4015 if (*p >= 0x80000000) 4016 { // Use 64 bit register MOV, as the 32 bit one gets sign extended 4017 // MOV reg,imm64 4018 // MOV EA,reg 4019 goto Lbreak; 4020 } 4021 p = cast(targ_size_t *)(cast(char *) p + REGSIZE); 4022 i -= REGSIZE; 4023 } while (i > 0); 4024 p = cast(targ_size_t *) &(e.EV); 4025 } 4026 4027 int i = cast(int)sz; 4028 do 4029 { int regsize = REGSIZE; 4030 regm_t retregs = (sz == 1) ? BYTEREGS : allregs; 4031 reg_t reg; 4032 if (reghasvalue(retregs,*p,®)) 4033 { 4034 cs.Iop = (cs.Iop & 1) | 0x88; 4035 cs.Irm |= modregrm(0, reg & 7, 0); // MOV EA,reg 4036 if (reg & 8) 4037 cs.Irex |= REX_R; 4038 if (I64 && sz == 1 && reg >= 4) 4039 cs.Irex |= REX; 4040 } 4041 if (I64 && sz >= 8) 4042 cs.Irex |= REX_W; 4043 cdb.gen(&cs); // MOV EA,const 4044 4045 p = cast(targ_size_t *)(cast(char *) p + regsize); 4046 cs.Iop = 0xC7; 4047 cs.Irm &= cast(ubyte)~cast(int)modregrm(0, 7, 0); 4048 cs.Irex &= ~REX_R; 4049 cs.IEV1.Voffset += regsize; 4050 cs.IEV2.Vint = cast(targ_int)*p; 4051 i -= regsize; 4052 } while (i > 0); 4053 return; 4054 } 4055 4056 Lbreak: 4057 break; 4058 4059 default: 4060 break; 4061 } 4062 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 4063 if (tyvector(tym)) 4064 { 4065 retregs = XMMREGS; 4066 codelem(cdb, e, &retregs, false); 4067 const op = xmmstore(tym); 4068 const r = findreg(retregs); 4069 cdb.genc1(op, modregxrm(2, r - XMM0, BPRM), FLfuncarg, funcargtos - 16); // MOV funcarg[EBP],r 4070 checkSetVex(cdb.last(),tym); 4071 return; 4072 } 4073 else if (tyfloating(tym)) 4074 { 4075 if (config.inline8087) 4076 { 4077 retregs = tycomplex(tym) ? mST01 : mST0; 4078 codelem(cdb, e, &retregs, false); 4079 4080 opcode_t op; 4081 uint r; 4082 switch (tym) 4083 { 4084 case TYfloat: 4085 case TYifloat: 4086 case TYcfloat: 4087 op = 0xD9; 4088 r = 3; 4089 break; 4090 4091 case TYdouble: 4092 case TYidouble: 4093 case TYdouble_alias: 4094 case TYcdouble: 4095 op = 0xDD; 4096 r = 3; 4097 break; 4098 4099 case TYldouble: 4100 case TYildouble: 4101 case TYcldouble: 4102 op = 0xDB; 4103 r = 7; 4104 break; 4105 4106 default: 4107 assert(0); 4108 } 4109 if (tycomplex(tym)) 4110 { 4111 // FSTP sz/2[ESP] 4112 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz/2); 4113 pop87(); 4114 } 4115 pop87(); 4116 cdb.genc1(op, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - sz); // FSTP -sz[EBP] 4117 return; 4118 } 4119 } 4120 scodelem(cdb, e, &retregs, 0, true); 4121 if (sz <= REGSIZE) 4122 { 4123 uint r = findreg(retregs); 4124 cdb.genc1(0x89, modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4125 if (sz == 8) 4126 code_orrex(cdb.last(), REX_W); 4127 } 4128 else if (sz == REGSIZE * 2) 4129 { 4130 uint r = findregmsw(retregs); 4131 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE); // MOV -REGSIZE[EBP],r 4132 r = findreglsw(retregs); 4133 cdb.genc1(0x89, grex | modregxrm(2, r, BPRM), FLfuncarg, funcargtos - REGSIZE * 2); // MOV -2*REGSIZE[EBP],r 4134 } 4135 else 4136 assert(0); 4137 } 4138 4139 4140 /*************************** 4141 * Generate code to push argument e on the stack. 4142 * stackpush is incremented by stackalign for each PUSH. 4143 */ 4144 4145 void pushParams(ref CodeBuilder cdb, elem* e, uint stackalign, tym_t tyf) 4146 { 4147 //printf("params(e = %p, stackalign = %d)\n", e, stackalign); 4148 //printf("params()\n"); elem_print(e); 4149 stackchanged = 1; 4150 assert(e && e.Eoper != OPparam); 4151 4152 tym_t tym = tybasic(e.Ety); 4153 if (tyfloating(tym)) 4154 objmod.fltused(); 4155 4156 int grex = I64 ? REX_W << 16 : 0; 4157 4158 targ_size_t szb = paramsize(e, tyf); // size before alignment 4159 targ_size_t sz = _align(stackalign,szb); // size after alignment 4160 assert((sz & (stackalign - 1)) == 0); // ensure that alignment worked 4161 assert((sz & (REGSIZE - 1)) == 0); 4162 4163 switch (e.Eoper) 4164 { 4165 version (SCPP) 4166 { 4167 case OPstrctor: 4168 { 4169 elem* e1 = e.EV.E1; 4170 docommas(cdb,&e1); // skip over any comma expressions 4171 4172 cod3_stackadj(cdb, sz); 4173 stackpush += sz; 4174 cdb.genadjesp(sz); 4175 4176 // Find OPstrthis and set it to stackpush 4177 exp2_setstrthis(e1, null, stackpush, null); 4178 4179 regm_t retregs = 0; 4180 codelem(cdb, e1, &retregs, true); 4181 freenode(e); 4182 return; 4183 } 4184 case OPstrthis: 4185 // This is the parameter for the 'this' pointer corresponding to 4186 // OPstrctor. We push a pointer to an object that was already 4187 // allocated on the stack by OPstrctor. 4188 { 4189 regm_t retregs = allregs; 4190 reg_t reg; 4191 allocreg(cdb, &retregs, ®, TYoffset); 4192 genregs(cdb, 0x89, SP, reg); // MOV reg,SP 4193 if (I64) 4194 code_orrex(cdb.last(), REX_W); 4195 uint np = stackpush - e.EV.Vuns; // stack delta to parameter 4196 cdb.genc2(0x81, grex | modregrmx(3, 0, reg), np); // ADD reg,np 4197 if (sz > REGSIZE) 4198 { 4199 cdb.gen1(0x16); // PUSH SS 4200 stackpush += REGSIZE; 4201 } 4202 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4203 if (reg & 8) 4204 code_orrex(cdb.last(), REX_B); 4205 stackpush += REGSIZE; 4206 cdb.genadjesp(sz); 4207 freenode(e); 4208 return; 4209 } 4210 } 4211 4212 case OPstrpar: 4213 { 4214 uint rm; 4215 4216 elem* e1 = e.EV.E1; 4217 if (sz == 0) 4218 { 4219 docommas(cdb, &e1); // skip over any commas 4220 freenode(e); 4221 return; 4222 } 4223 if ((sz & 3) == 0 && (sz / REGSIZE) <= 4 && e1.Eoper == OPvar) 4224 { 4225 freenode(e); 4226 e = e1; 4227 goto L1; 4228 } 4229 docommas(cdb,&e1); // skip over any commas 4230 code_flags_t seg = 0; // assume no seg override 4231 regm_t retregs = sz ? IDXREGS : 0; 4232 bool doneoff = false; 4233 uint pushsize = REGSIZE; 4234 uint op16 = 0; 4235 if (!I16 && sz & 2) // if odd number of words to push 4236 { 4237 pushsize = 2; 4238 op16 = 1; 4239 } 4240 else if (I16 && config.target_cpu >= TARGET_80386 && (sz & 3) == 0) 4241 { 4242 pushsize = 4; // push DWORDs at a time 4243 op16 = 1; 4244 } 4245 uint npushes = cast(uint)(sz / pushsize); 4246 switch (e1.Eoper) 4247 { 4248 case OPind: 4249 if (sz) 4250 { 4251 switch (tybasic(e1.EV.E1.Ety)) 4252 { 4253 case TYfptr: 4254 case TYhptr: 4255 seg = CFes; 4256 retregs |= mES; 4257 break; 4258 4259 case TYsptr: 4260 if (config.wflags & WFssneds) 4261 seg = CFss; 4262 break; 4263 4264 case TYfgPtr: 4265 if (I32) 4266 seg = CFgs; 4267 else if (I64) 4268 seg = CFfs; 4269 else 4270 assert(0); 4271 break; 4272 4273 case TYcptr: 4274 seg = CFcs; 4275 break; 4276 4277 default: 4278 break; 4279 } 4280 } 4281 codelem(cdb, e1.EV.E1, &retregs, false); 4282 freenode(e1); 4283 break; 4284 4285 case OPvar: 4286 /* Symbol is no longer a candidate for a register */ 4287 e1.EV.Vsym.Sflags &= ~GTregcand; 4288 4289 if (!e1.Ecount && npushes > 4) 4290 { 4291 /* Kludge to point at last word in struct. */ 4292 /* Don't screw up CSEs. */ 4293 e1.EV.Voffset += sz - pushsize; 4294 doneoff = true; 4295 } 4296 //if (LARGEDATA) /* if default isn't DS */ 4297 { 4298 static immutable uint[4] segtocf = [ CFes,CFcs,CFss,0 ]; 4299 4300 int fl = el_fl(e1); 4301 if (fl == FLfardata) 4302 { 4303 seg = CFes; 4304 retregs |= mES; 4305 } 4306 else 4307 { 4308 uint s = segfl[fl]; 4309 assert(s < 4); 4310 seg = segtocf[s]; 4311 if (seg == CFss && !(config.wflags & WFssneds)) 4312 seg = 0; 4313 } 4314 } 4315 if (e1.Ety & mTYfar) 4316 { 4317 seg = CFes; 4318 retregs |= mES; 4319 } 4320 cdrelconst(cdb, e1, &retregs); 4321 // Reverse the effect of the previous add 4322 if (doneoff) 4323 e1.EV.Voffset -= sz - pushsize; 4324 freenode(e1); 4325 break; 4326 4327 case OPstreq: 4328 //case OPcond: 4329 if (!(config.exe & EX_flat)) 4330 { 4331 seg = CFes; 4332 retregs |= mES; 4333 } 4334 codelem(cdb, e1, &retregs, false); 4335 break; 4336 4337 case OPpair: 4338 case OPrpair: 4339 pushParams(cdb, e1, stackalign, tyf); 4340 freenode(e); 4341 return; 4342 4343 default: 4344 elem_print(e1); 4345 assert(0); 4346 } 4347 reg_t reg = findreglsw(retregs); 4348 rm = I16 ? regtorm[reg] : regtorm32[reg]; 4349 if (op16) 4350 seg |= CFopsize; // operand size 4351 if (npushes <= 4) 4352 { 4353 assert(!doneoff); 4354 for (; npushes > 1; --npushes) 4355 { 4356 cdb.genc1(0xFF, buildModregrm(2, 6, rm), FLconst, pushsize * (npushes - 1)); // PUSH [reg] 4357 code_orflag(cdb.last(),seg); 4358 cdb.genadjesp(pushsize); 4359 } 4360 cdb.gen2(0xFF,buildModregrm(0, 6, rm)); // PUSH [reg] 4361 cdb.last().Iflags |= seg; 4362 cdb.genadjesp(pushsize); 4363 } 4364 else if (sz) 4365 { 4366 getregs_imm(cdb, mCX | retregs); 4367 // MOV CX,sz/2 4368 movregconst(cdb, CX, npushes, 0); 4369 if (!doneoff) 4370 { // This should be done when 4371 // reg is loaded. Fix later 4372 // ADD reg,sz-pushsize 4373 cdb.genc2(0x81, grex | modregrmx(3, 0, reg), sz-pushsize); 4374 } 4375 getregs(cdb,mCX); // the LOOP decrements it 4376 cdb.gen2(0xFF, buildModregrm(0, 6, rm)); // PUSH [reg] 4377 cdb.last().Iflags |= seg | CFtarg2; 4378 code* c3 = cdb.last(); 4379 cdb.genc2(0x81,grex | buildModregrm(3, 5,reg), pushsize); // SUB reg,pushsize 4380 if (I16 || config.flags4 & CFG4space) 4381 genjmp(cdb,0xE2,FLcode,cast(block *)c3);// LOOP c3 4382 else 4383 { 4384 if (I64) 4385 cdb.gen2(0xFF, modregrm(3, 1, CX));// DEC CX 4386 else 4387 cdb.gen1(0x48 + CX); // DEC CX 4388 genjmp(cdb, JNE, FLcode, cast(block *)c3); // JNE c3 4389 } 4390 regimmed_set(CX,0); 4391 cdb.genadjesp(cast(int)sz); 4392 } 4393 stackpush += sz; 4394 freenode(e); 4395 return; 4396 } 4397 4398 case OPind: 4399 if (!e.Ecount) /* if *e1 */ 4400 { 4401 if (sz <= REGSIZE) 4402 { // Watch out for single byte quantities being up 4403 // against the end of a segment or in memory-mapped I/O 4404 if (!(config.exe & EX_flat) && szb == 1) 4405 break; 4406 goto L1; // can handle it with loadea() 4407 } 4408 4409 // Avoid PUSH MEM on the Pentium when optimizing for speed 4410 if (config.flags4 & CFG4speed && 4411 (config.target_cpu >= TARGET_80486 && 4412 config.target_cpu <= TARGET_PentiumMMX) && 4413 sz <= 2 * REGSIZE && 4414 !tyfloating(tym)) 4415 break; 4416 4417 if (tym == TYldouble || tym == TYildouble || tycomplex(tym)) 4418 break; 4419 4420 code cs; 4421 cs.Iflags = 0; 4422 cs.Irex = 0; 4423 if (I32) 4424 { 4425 assert(sz >= REGSIZE * 2); 4426 loadea(cdb, e, &cs, 0xFF, 6, sz - REGSIZE, 0, 0); // PUSH EA+4 4427 cdb.genadjesp(REGSIZE); 4428 stackpush += REGSIZE; 4429 sz -= REGSIZE; 4430 4431 if (sz > REGSIZE) 4432 { 4433 while (sz) 4434 { 4435 cs.IEV1.Voffset -= REGSIZE; 4436 cdb.gen(&cs); // PUSH EA+... 4437 cdb.genadjesp(REGSIZE); 4438 stackpush += REGSIZE; 4439 sz -= REGSIZE; 4440 } 4441 freenode(e); 4442 return; 4443 } 4444 } 4445 else 4446 { 4447 if (sz == DOUBLESIZE) 4448 { 4449 loadea(cdb, e, &cs, 0xFF, 6, DOUBLESIZE - REGSIZE, 0, 0); // PUSH EA+6 4450 cs.IEV1.Voffset -= REGSIZE; 4451 cdb.gen(&cs); // PUSH EA+4 4452 cdb.genadjesp(REGSIZE); 4453 getlvalue_lsw(&cs); 4454 cdb.gen(&cs); // PUSH EA+2 4455 } 4456 else /* TYlong */ 4457 loadea(cdb, e, &cs, 0xFF, 6, REGSIZE, 0, 0); // PUSH EA+2 4458 cdb.genadjesp(REGSIZE); 4459 } 4460 stackpush += sz; 4461 getlvalue_lsw(&cs); 4462 cdb.gen(&cs); // PUSH EA 4463 cdb.genadjesp(REGSIZE); 4464 freenode(e); 4465 return; 4466 } 4467 break; 4468 4469 case OPnp_fp: 4470 if (!e.Ecount) /* if (far *)e1 */ 4471 { 4472 elem* e1 = e.EV.E1; 4473 tym_t tym1 = tybasic(e1.Ety); 4474 /* BUG: what about pointers to functions? */ 4475 int segreg; 4476 switch (tym1) 4477 { 4478 case TYnptr: segreg = 3<<3; break; 4479 case TYcptr: segreg = 1<<3; break; 4480 default: segreg = 2<<3; break; 4481 } 4482 if (I32 && stackalign == 2) 4483 cdb.gen1(0x66); // push a word 4484 cdb.gen1(0x06 + segreg); // PUSH SEGREG 4485 if (I32 && stackalign == 2) 4486 code_orflag(cdb.last(), CFopsize); // push a word 4487 cdb.genadjesp(stackalign); 4488 stackpush += stackalign; 4489 pushParams(cdb, e1, stackalign, tyf); 4490 freenode(e); 4491 return; 4492 } 4493 break; 4494 4495 case OPrelconst: 4496 static if (TARGET_SEGMENTED) 4497 { 4498 /* Determine if we can just push the segment register */ 4499 /* Test size of type rather than TYfptr because of (long)(&v) */ 4500 Symbol* s = e.EV.Vsym; 4501 //if (sytab[s.Sclass] & SCSS && !I32) // if variable is on stack 4502 // needframe = true; // then we need stack frame 4503 int fl; 4504 if (_tysize[tym] == tysize(TYfptr) && 4505 (fl = s.Sfl) != FLfardata && 4506 /* not a function that CS might not be the segment of */ 4507 (!((fl == FLfunc || s.ty() & mTYcs) && 4508 (s.Sclass == SCcomdat || s.Sclass == SCextern || s.Sclass == SCinline || config.wflags & WFthunk)) || 4509 (fl == FLfunc && config.exe == EX_DOSX) 4510 ) 4511 ) 4512 { 4513 stackpush += sz; 4514 cdb.gen1(0x06 + // PUSH SEGREG 4515 (((fl == FLfunc || s.ty() & mTYcs) ? 1 : segfl[fl]) << 3)); 4516 cdb.genadjesp(REGSIZE); 4517 4518 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4519 { 4520 getoffset(cdb, e, STACK); 4521 freenode(e); 4522 return; 4523 } 4524 else 4525 { 4526 regm_t retregs; 4527 offsetinreg(cdb, e, &retregs); 4528 const reg = findreg(retregs); 4529 genpush(cdb,reg); // PUSH reg 4530 cdb.genadjesp(REGSIZE); 4531 } 4532 return; 4533 } 4534 if (config.target_cpu >= TARGET_80286 && !e.Ecount) 4535 { 4536 stackpush += sz; 4537 if (_tysize[tym] == tysize(TYfptr)) 4538 { 4539 // PUSH SEG e 4540 cdb.gencs(0x68,0,FLextern,s); 4541 cdb.last().Iflags = CFseg; 4542 cdb.genadjesp(REGSIZE); 4543 } 4544 getoffset(cdb, e, STACK); 4545 freenode(e); 4546 return; 4547 } 4548 } 4549 break; /* else must evaluate expression */ 4550 4551 case OPvar: 4552 L1: 4553 if (config.flags4 & CFG4speed && 4554 (config.target_cpu >= TARGET_80486 && 4555 config.target_cpu <= TARGET_PentiumMMX) && 4556 sz <= 2 * REGSIZE && 4557 !tyfloating(tym)) 4558 { // Avoid PUSH MEM on the Pentium when optimizing for speed 4559 break; 4560 } 4561 else if (movOnly(e) || (tyxmmreg(tym) && config.fpxmmregs) || tyvector(tym)) 4562 break; // no PUSH MEM 4563 else 4564 { 4565 int regsize = REGSIZE; 4566 uint flag = 0; 4567 if (I16 && config.target_cpu >= TARGET_80386 && sz > 2 && 4568 !e.Ecount) 4569 { 4570 regsize = 4; 4571 flag |= CFopsize; 4572 } 4573 code cs; 4574 cs.Iflags = 0; 4575 cs.Irex = 0; 4576 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); // PUSH EA+sz-2 4577 code_orflag(cdb.last(), flag); 4578 cdb.genadjesp(REGSIZE); 4579 stackpush += sz; 4580 while (cast(targ_int)(sz -= regsize) > 0) 4581 { 4582 loadea(cdb, e, &cs, 0xFF, 6, sz - regsize, RMload, 0); 4583 code_orflag(cdb.last(), flag); 4584 cdb.genadjesp(REGSIZE); 4585 } 4586 freenode(e); 4587 return; 4588 } 4589 4590 case OPconst: 4591 { 4592 char pushi = 0; 4593 uint flag = 0; 4594 int regsize = REGSIZE; 4595 4596 if (tycomplex(tym)) 4597 break; 4598 4599 if (I64 && tyfloating(tym) && sz > 4 && boolres(e)) 4600 // Can't push 64 bit non-zero args directly 4601 break; 4602 4603 if (I32 && szb == 10) // special case for long double constants 4604 { 4605 assert(sz == 12); 4606 targ_int value = e.EV.Vushort8[4]; // pick upper 2 bytes of Vldouble 4607 stackpush += sz; 4608 cdb.genadjesp(cast(int)sz); 4609 for (int i = 0; i < 3; ++i) 4610 { 4611 reg_t reg; 4612 if (reghasvalue(allregs, value, ®)) 4613 cdb.gen1(0x50 + reg); // PUSH reg 4614 else 4615 cdb.genc2(0x68,0,value); // PUSH value 4616 value = e.EV.Vulong4[i ^ 1]; // treat Vldouble as 2 element array of 32 bit uint 4617 } 4618 freenode(e); 4619 return; 4620 } 4621 4622 assert(I64 || sz <= tysize(TYldouble)); 4623 int i = cast(int)sz; 4624 if (!I16 && i == 2) 4625 flag = CFopsize; 4626 4627 if (config.target_cpu >= TARGET_80286) 4628 // && (e.Ecount == 0 || e.Ecount != e.Ecomsub)) 4629 { 4630 pushi = 1; 4631 if (I16 && config.target_cpu >= TARGET_80386 && i >= 4) 4632 { 4633 regsize = 4; 4634 flag = CFopsize; 4635 } 4636 } 4637 else if (i == REGSIZE) 4638 break; 4639 4640 stackpush += sz; 4641 cdb.genadjesp(cast(int)sz); 4642 targ_uns* pi = &e.EV.Vuns; // point to start of Vdouble 4643 targ_ushort* ps = cast(targ_ushort *) pi; 4644 targ_ullong* pl = cast(targ_ullong *)pi; 4645 i /= regsize; 4646 do 4647 { 4648 if (i) /* be careful not to go negative */ 4649 i--; 4650 4651 targ_size_t value; 4652 switch (regsize) 4653 { 4654 case 2: 4655 value = ps[i]; 4656 break; 4657 4658 case 4: 4659 if (tym == TYldouble || tym == TYildouble) 4660 /* The size is 10 bytes, and since we have 2 bytes left over, 4661 * just read those 2 bytes, not 4. 4662 * Otherwise we're reading uninitialized data. 4663 * I.e. read 4 bytes, 4 bytes, then 2 bytes 4664 */ 4665 value = i == 2 ? ps[4] : pi[i]; // 80 bits 4666 else 4667 value = pi[i]; 4668 break; 4669 4670 case 8: 4671 value = cast(targ_size_t)pl[i]; 4672 break; 4673 4674 default: 4675 assert(0); 4676 } 4677 4678 reg_t reg; 4679 if (pushi) 4680 { 4681 if (I64 && regsize == 8 && value != cast(int)value) 4682 { 4683 regwithvalue(cdb,allregs,value,®,64); 4684 goto Preg; // cannot push imm64 unless it is sign extended 32 bit value 4685 } 4686 if (regsize == REGSIZE && reghasvalue(allregs,value,®)) 4687 goto Preg; 4688 cdb.genc2((szb == 1) ? 0x6A : 0x68, 0, value); // PUSH value 4689 } 4690 else 4691 { 4692 regwithvalue(cdb, allregs, value, ®, 0); 4693 Preg: 4694 genpush(cdb,reg); // PUSH reg 4695 } 4696 code_orflag(cdb.last(), flag); // operand size 4697 } while (i); 4698 freenode(e); 4699 return; 4700 } 4701 4702 case OPpair: 4703 { 4704 if (e.Ecount) 4705 break; 4706 const op1 = e.EV.E1.Eoper; 4707 const op2 = e.EV.E2.Eoper; 4708 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 4709 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 4710 { 4711 pushParams(cdb, e.EV.E2, stackalign, tyf); 4712 pushParams(cdb, e.EV.E1, stackalign, tyf); 4713 freenode(e); 4714 } 4715 else if (tyfloating(e.EV.E1.Ety) || 4716 tyfloating(e.EV.E2.Ety)) 4717 { 4718 // Need special handling because of order of evaluation of e1 and e2 4719 break; 4720 } 4721 else 4722 { 4723 regm_t regs = allregs; 4724 codelem(cdb, e, ®s, false); 4725 genpush(cdb, findregmsw(regs)); // PUSH msreg 4726 genpush(cdb, findreglsw(regs)); // PUSH lsreg 4727 cdb.genadjesp(cast(int)sz); 4728 stackpush += sz; 4729 } 4730 return; 4731 } 4732 4733 case OPrpair: 4734 { 4735 if (e.Ecount) 4736 break; 4737 const op1 = e.EV.E1.Eoper; 4738 const op2 = e.EV.E2.Eoper; 4739 if ((op1 == OPvar || op1 == OPconst || op1 == OPrelconst) && 4740 (op2 == OPvar || op2 == OPconst || op2 == OPrelconst)) 4741 { 4742 pushParams(cdb, e.EV.E1, stackalign, tyf); 4743 pushParams(cdb, e.EV.E2, stackalign, tyf); 4744 freenode(e); 4745 } 4746 else if (tyfloating(e.EV.E1.Ety) || 4747 tyfloating(e.EV.E2.Ety)) 4748 { 4749 // Need special handling because of order of evaluation of e1 and e2 4750 break; 4751 } 4752 else 4753 { 4754 regm_t regs = allregs; 4755 codelem(cdb, e, ®s, false); 4756 genpush(cdb, findregmsw(regs)); // PUSH msreg 4757 genpush(cdb, findreglsw(regs)); // PUSH lsreg 4758 cdb.genadjesp(cast(int)sz); 4759 stackpush += sz; 4760 } 4761 return; 4762 } 4763 4764 default: 4765 break; 4766 } 4767 4768 regm_t retregs = tybyte(tym) ? BYTEREGS : allregs; 4769 if (tyvector(tym) || (tyxmmreg(tym) && config.fpxmmregs)) 4770 { 4771 regm_t retxmm = XMMREGS; 4772 codelem(cdb, e, &retxmm, false); 4773 stackpush += sz; 4774 cdb.genadjesp(cast(int)sz); 4775 cod3_stackadj(cdb, cast(int)sz); 4776 const op = xmmstore(tym); 4777 const r = findreg(retxmm); 4778 cdb.gen2sib(op, modregxrm(0, r - XMM0,4 ), modregrm(0, 4, SP)); // MOV [ESP],r 4779 checkSetVex(cdb.last(),tym); 4780 return; 4781 } 4782 else if (tyfloating(tym)) 4783 { 4784 if (config.inline8087) 4785 { 4786 retregs = tycomplex(tym) ? mST01 : mST0; 4787 codelem(cdb, e, &retregs, false); 4788 stackpush += sz; 4789 cdb.genadjesp(cast(int)sz); 4790 cod3_stackadj(cdb, cast(int)sz); 4791 opcode_t op; 4792 uint r; 4793 switch (tym) 4794 { 4795 case TYfloat: 4796 case TYifloat: 4797 case TYcfloat: 4798 op = 0xD9; 4799 r = 3; 4800 break; 4801 4802 case TYdouble: 4803 case TYidouble: 4804 case TYdouble_alias: 4805 case TYcdouble: 4806 op = 0xDD; 4807 r = 3; 4808 break; 4809 4810 case TYldouble: 4811 case TYildouble: 4812 case TYcldouble: 4813 op = 0xDB; 4814 r = 7; 4815 break; 4816 4817 default: 4818 assert(0); 4819 } 4820 if (!I16) 4821 { 4822 if (tycomplex(tym)) 4823 { 4824 // FSTP sz/2[ESP] 4825 cdb.genc1(op, (modregrm(0, 4, SP) << 8) | modregxrm(2, r, 4),FLconst, sz/2); 4826 pop87(); 4827 } 4828 pop87(); 4829 cdb.gen2sib(op, modregrm(0, r, 4),modregrm(0, 4, SP)); // FSTP [ESP] 4830 } 4831 else 4832 { 4833 retregs = IDXREGS; // get an index reg 4834 reg_t reg; 4835 allocreg(cdb, &retregs, ®, TYoffset); 4836 genregs(cdb, 0x89, SP, reg); // MOV reg,SP 4837 pop87(); 4838 cdb.gen2(op, modregrm(0, r, regtorm[reg])); // FSTP [reg] 4839 } 4840 if (LARGEDATA) 4841 cdb.last().Iflags |= CFss; // want to store into stack 4842 genfwait(cdb); // FWAIT 4843 return; 4844 } 4845 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 4846 retregs = mSTACK; 4847 } 4848 else if (I16 && sz == 8) // if long long 4849 retregs = mSTACK; 4850 4851 scodelem(cdb,e,&retregs,0,true); 4852 if (retregs != mSTACK) // if stackpush not already inc'd 4853 stackpush += sz; 4854 if (sz <= REGSIZE) 4855 { 4856 genpush(cdb,findreg(retregs)); // PUSH reg 4857 cdb.genadjesp(cast(int)REGSIZE); 4858 } 4859 else if (sz == REGSIZE * 2) 4860 { 4861 genpush(cdb,findregmsw(retregs)); // PUSH msreg 4862 genpush(cdb,findreglsw(retregs)); // PUSH lsreg 4863 cdb.genadjesp(cast(int)sz); 4864 } 4865 } 4866 4867 /******************************* 4868 * Get offset portion of e, and store it in an index 4869 * register. Return mask of index register in *pretregs. 4870 */ 4871 4872 void offsetinreg(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 4873 { 4874 reg_t reg; 4875 regm_t retregs = mLSW; // want only offset 4876 if (e.Ecount && e.Ecount != e.Ecomsub) 4877 { 4878 regm_t rm = retregs & regcon.cse.mval & ~regcon.cse.mops & ~regcon.mvar; /* possible regs */ 4879 for (uint i = 0; rm; i++) 4880 { 4881 if (mask(i) & rm && regcon.cse.value[i] == e) 4882 { 4883 *pretregs = mask(i); 4884 getregs(cdb, *pretregs); 4885 goto L3; 4886 } 4887 rm &= ~mask(i); 4888 } 4889 } 4890 4891 *pretregs = retregs; 4892 allocreg(cdb, pretregs, ®, TYoffset); 4893 getoffset(cdb,e,reg); 4894 L3: 4895 cssave(e, *pretregs,false); 4896 freenode(e); 4897 } 4898 4899 /****************************** 4900 * Generate code to load data into registers. 4901 */ 4902 4903 4904 void loaddata(ref CodeBuilder cdb, elem* e, regm_t* pretregs) 4905 { 4906 reg_t reg; 4907 reg_t nreg; 4908 reg_t sreg; 4909 opcode_t op; 4910 tym_t tym; 4911 code cs; 4912 regm_t flags, forregs, regm; 4913 4914 debug 4915 { 4916 // if (debugw) 4917 // printf("loaddata(e = %p,*pretregs = %s)\n",e,regm_str(*pretregs)); 4918 // elem_print(e); 4919 } 4920 4921 assert(e); 4922 elem_debug(e); 4923 if (*pretregs == 0) 4924 return; 4925 tym = tybasic(e.Ety); 4926 if (tym == TYstruct) 4927 { 4928 cdrelconst(cdb,e,pretregs); 4929 return; 4930 } 4931 if (tyfloating(tym)) 4932 { 4933 objmod.fltused(); 4934 if (config.inline8087) 4935 { 4936 if (*pretregs & mST0) 4937 { 4938 load87(cdb, e, 0, pretregs, null, -1); 4939 return; 4940 } 4941 else if (tycomplex(tym)) 4942 { 4943 cload87(cdb, e, pretregs); 4944 return; 4945 } 4946 } 4947 } 4948 int sz = _tysize[tym]; 4949 cs.Iflags = 0; 4950 cs.Irex = 0; 4951 if (*pretregs == mPSW) 4952 { 4953 Symbol *s; 4954 regm = allregs; 4955 if (e.Eoper == OPconst) 4956 { /* true: OR SP,SP (SP is never 0) */ 4957 /* false: CMP SP,SP (always equal) */ 4958 genregs(cdb, (boolres(e)) ? 0x09 : 0x39 , SP, SP); 4959 if (I64) 4960 code_orrex(cdb.last(), REX_W); 4961 } 4962 else if (e.Eoper == OPvar && 4963 (s = e.EV.Vsym).Sfl == FLreg && 4964 s.Sregm & XMMREGS && 4965 (tym == TYfloat || tym == TYifloat || tym == TYdouble || tym ==TYidouble)) 4966 { 4967 tstresult(cdb,s.Sregm,e.Ety,true); 4968 } 4969 else if (sz <= REGSIZE) 4970 { 4971 if (!I16 && (tym == TYfloat || tym == TYifloat)) 4972 { 4973 allocreg(cdb, ®m, ®, TYoffset); // get a register 4974 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 4975 cdb.gen2(0xD1,modregrmx(3,4,reg)); // SHL reg,1 4976 } 4977 else if (I64 && (tym == TYdouble || tym ==TYidouble)) 4978 { 4979 allocreg(cdb, ®m, ®, TYoffset); // get a register 4980 loadea(cdb, e,&cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 4981 // remove sign bit, so that -0.0 == 0.0 4982 cdb.gen2(0xD1, modregrmx(3, 4, reg)); // SHL reg,1 4983 code_orrex(cdb.last(), REX_W); 4984 } 4985 else if (TARGET_OSX && e.Eoper == OPvar && movOnly(e)) 4986 { 4987 allocreg(cdb, ®m, ®, TYoffset); // get a register 4988 loadea(cdb, e, &cs, 0x8B, reg, 0, 0, 0); // MOV reg,data 4989 fixresult(cdb, e, regm, pretregs); 4990 } 4991 else 4992 { cs.IFL2 = FLconst; 4993 cs.IEV2.Vsize_t = 0; 4994 op = (sz == 1) ? 0x80 : 0x81; 4995 loadea(cdb, e, &cs, op, 7, 0, 0, 0); // CMP EA,0 4996 4997 // Convert to TEST instruction if EA is a register 4998 // (to avoid register contention on Pentium) 4999 code *c = cdb.last(); 5000 if ((c.Iop & ~1) == 0x38 && 5001 (c.Irm & modregrm(3, 0, 0)) == modregrm(3, 0, 0) 5002 ) 5003 { 5004 c.Iop = (c.Iop & 1) | 0x84; 5005 code_newreg(c, c.Irm & 7); 5006 if (c.Irex & REX_B) 5007 //c.Irex = (c.Irex & ~REX_B) | REX_R; 5008 c.Irex |= REX_R; 5009 } 5010 } 5011 } 5012 else if (sz < 8) 5013 { 5014 allocreg(cdb, ®m, ®, TYoffset); // get a register 5015 if (I32) // it's a 48 bit pointer 5016 loadea(cdb, e, &cs, 0x0FB7, reg, REGSIZE, 0, 0); // MOVZX reg,data+4 5017 else 5018 { 5019 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, 0, 0); // MOV reg,data+2 5020 if (tym == TYfloat || tym == TYifloat) // dump sign bit 5021 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5022 } 5023 loadea(cdb,e,&cs,0x0B,reg,0,regm,0); // OR reg,data 5024 } 5025 else if (sz == 8 || (I64 && sz == 2 * REGSIZE && !tyfloating(tym))) 5026 { 5027 allocreg(cdb, ®m, ®, TYoffset); // get a register 5028 int i = sz - REGSIZE; 5029 loadea(cdb, e, &cs, 0x8B, reg, i, 0, 0); // MOV reg,data+6 5030 if (tyfloating(tym)) // TYdouble or TYdouble_alias 5031 cdb.gen2(0xD1, modregrm(3, 4, reg)); // SHL reg,1 5032 5033 while ((i -= REGSIZE) >= 0) 5034 { 5035 loadea(cdb, e, &cs, 0x0B, reg, i, regm, 0); // OR reg,data+i 5036 code *c = cdb.last(); 5037 if (i == 0) 5038 c.Iflags |= CFpsw; // need the flags on last OR 5039 } 5040 } 5041 else if (sz == tysize(TYldouble)) // TYldouble 5042 load87(cdb, e, 0, pretregs, null, -1); 5043 else 5044 { 5045 elem_print(e); 5046 assert(0); 5047 } 5048 return; 5049 } 5050 /* not for flags only */ 5051 flags = *pretregs & mPSW; /* save original */ 5052 forregs = *pretregs & (mBP | ALLREGS | mES | XMMREGS); 5053 if (*pretregs & mSTACK) 5054 forregs |= DOUBLEREGS; 5055 if (e.Eoper == OPconst) 5056 { 5057 targ_size_t value = e.EV.Vint; 5058 if (sz == 8) 5059 value = cast(targ_size_t)e.EV.Vullong; 5060 5061 if (sz == REGSIZE && reghasvalue(forregs, value, ®)) 5062 forregs = mask(reg); 5063 5064 regm_t save = regcon.immed.mval; 5065 allocreg(cdb, &forregs, ®, tym); // allocate registers 5066 regcon.immed.mval = save; // KLUDGE! 5067 if (sz <= REGSIZE) 5068 { 5069 if (sz == 1) 5070 flags |= 1; 5071 else if (!I16 && sz == SHORTSIZE && 5072 !(mask(reg) & regcon.mvar) && 5073 !(config.flags4 & CFG4speed) 5074 ) 5075 flags |= 2; 5076 if (sz == 8) 5077 flags |= 64; 5078 if (isXMMreg(reg)) 5079 { /* This comes about because 0, 1, pi, etc., constants don't get stored 5080 * in the data segment, because they are x87 opcodes. 5081 * Not so efficient. We should at least do a PXOR for 0. 5082 */ 5083 reg_t r; 5084 targ_size_t unsvalue = e.EV.Vuns; 5085 if (sz == 8) 5086 unsvalue = cast(targ_size_t)e.EV.Vullong; 5087 regwithvalue(cdb,ALLREGS, unsvalue,&r,flags); 5088 flags = 0; // flags are already set 5089 cdb.genfltreg(0x89, r, 0); // MOV floatreg,r 5090 if (sz == 8) 5091 code_orrex(cdb.last(), REX_W); 5092 assert(sz == 4 || sz == 8); // float or double 5093 const opmv = xmmload(tym); 5094 cdb.genxmmreg(opmv, reg, 0, tym); // MOVSS/MOVSD XMMreg,floatreg 5095 } 5096 else 5097 { 5098 movregconst(cdb, reg, value, flags); 5099 flags = 0; // flags are already set 5100 } 5101 } 5102 else if (sz < 8) // far pointers, longs for 16 bit targets 5103 { 5104 targ_int msw = I32 ? e.EV.Vseg 5105 : (e.EV.Vulong >> 16); 5106 targ_int lsw = e.EV.Voff; 5107 regm_t mswflags = 0; 5108 if (forregs & mES) 5109 { 5110 movregconst(cdb, reg, msw, 0); // MOV reg,segment 5111 genregs(cdb, 0x8E, 0, reg); // MOV ES,reg 5112 msw = lsw; // MOV reg,offset 5113 } 5114 else 5115 { 5116 sreg = findreglsw(forregs); 5117 movregconst(cdb, sreg, lsw, 0); 5118 reg = findregmsw(forregs); 5119 /* Decide if we need to set flags when we load msw */ 5120 if (flags && (msw && msw|lsw || !(msw|lsw))) 5121 { mswflags = mPSW; 5122 flags = 0; 5123 } 5124 } 5125 movregconst(cdb, reg, msw, mswflags); 5126 } 5127 else if (sz == 8) 5128 { 5129 if (I32) 5130 { 5131 targ_long *p = cast(targ_long *)cast(void*)&e.EV.Vdouble; 5132 if (isXMMreg(reg)) 5133 { /* This comes about because 0, 1, pi, etc., constants don't get stored 5134 * in the data segment, because they are x87 opcodes. 5135 * Not so efficient. We should at least do a PXOR for 0. 5136 */ 5137 reg_t r; 5138 regm_t rm = ALLREGS; 5139 allocreg(cdb, &rm, &r, TYint); // allocate scratch register 5140 movregconst(cdb, r, p[0], 0); 5141 cdb.genfltreg(0x89, r, 0); // MOV floatreg,r 5142 movregconst(cdb, r, p[1], 0); 5143 cdb.genfltreg(0x89, r, 4); // MOV floatreg+4,r 5144 5145 const opmv = xmmload(tym); 5146 cdb.genxmmreg(opmv, reg, 0, tym); // MOVSS/MOVSD XMMreg,floatreg 5147 } 5148 else 5149 { 5150 movregconst(cdb, findreglsw(forregs) ,p[0], 0); 5151 movregconst(cdb, findregmsw(forregs) ,p[1], 0); 5152 } 5153 } 5154 else 5155 { targ_short *p = &e.EV.Vshort; // point to start of Vdouble 5156 5157 assert(reg == AX); 5158 movregconst(cdb, AX, p[3], 0); // MOV AX,p[3] 5159 movregconst(cdb, DX, p[0], 0); 5160 movregconst(cdb, CX, p[1], 0); 5161 movregconst(cdb, BX, p[2], 0); 5162 } 5163 } 5164 else if (I64 && sz == 16) 5165 { 5166 movregconst(cdb, findreglsw(forregs), cast(targ_size_t)e.EV.Vcent.lsw, 64); 5167 movregconst(cdb, findregmsw(forregs), cast(targ_size_t)e.EV.Vcent.msw, 64); 5168 } 5169 else 5170 assert(0); 5171 // Flags may already be set 5172 *pretregs &= flags | ~mPSW; 5173 fixresult(cdb, e, forregs, pretregs); 5174 return; 5175 } 5176 else 5177 { 5178 // See if we can use register that parameter was passed in 5179 if (regcon.params && 5180 regParamInPreg(e.EV.Vsym) && 5181 !anyiasm && // may have written to the memory for the parameter 5182 (regcon.params & mask(e.EV.Vsym.Spreg) && e.EV.Voffset == 0 || 5183 regcon.params & mask(e.EV.Vsym.Spreg2) && e.EV.Voffset == REGSIZE) && 5184 sz <= REGSIZE) // make sure no 'paint' to a larger size happened 5185 { 5186 reg = e.EV.Voffset ? e.EV.Vsym.Spreg2 : e.EV.Vsym.Spreg; 5187 forregs = mask(reg); 5188 5189 if (debugr) 5190 printf("%s.%d is fastpar and using register %s\n", 5191 e.EV.Vsym.Sident.ptr, 5192 cast(int)e.EV.Voffset, 5193 regm_str(forregs)); 5194 5195 mfuncreg &= ~forregs; 5196 regcon.used |= forregs; 5197 fixresult(cdb,e,forregs,pretregs); 5198 return; 5199 } 5200 5201 allocreg(cdb, &forregs, ®, tym); // allocate registers 5202 5203 if (sz == 1) 5204 { regm_t nregm; 5205 5206 debug 5207 if (!(forregs & BYTEREGS)) 5208 { elem_print(e); 5209 printf("forregs = %s\n", regm_str(forregs)); 5210 } 5211 5212 opcode_t opmv = 0x8A; // byte MOV 5213 static if (TARGET_OSX) 5214 { 5215 if (movOnly(e)) 5216 opmv = 0x8B; 5217 } 5218 assert(forregs & BYTEREGS); 5219 if (!I16) 5220 { 5221 if (config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed && 5222 // Workaround for OSX linker bug: 5223 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5224 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5225 ) 5226 { 5227 // opmv = tyuns(tym) ? 0x0FB6 : 0x0FBE; // MOVZX/MOVSX 5228 } 5229 loadea(cdb, e, &cs, opmv, reg, 0, 0, 0); // MOV regL,data 5230 } 5231 else 5232 { 5233 nregm = tyuns(tym) ? BYTEREGS : cast(regm_t) mAX; 5234 if (*pretregs & nregm) 5235 nreg = reg; // already allocated 5236 else 5237 allocreg(cdb, &nregm, &nreg, tym); 5238 loadea(cdb, e, &cs, opmv, nreg, 0, 0, 0); // MOV nregL,data 5239 if (reg != nreg) 5240 { 5241 genmovreg(cdb, reg, nreg); // MOV reg,nreg 5242 cssave(e, mask(nreg), false); 5243 } 5244 } 5245 } 5246 else if (forregs & XMMREGS) 5247 { 5248 // Can't load from registers directly to XMM regs 5249 //e.EV.Vsym.Sflags &= ~GTregcand; 5250 5251 opcode_t opmv = xmmload(tym, xmmIsAligned(e)); 5252 if (e.Eoper == OPvar) 5253 { 5254 Symbol *s = e.EV.Vsym; 5255 if (s.Sfl == FLreg && !(mask(s.Sreglsw) & XMMREGS)) 5256 { opmv = LODD; // MOVD/MOVQ 5257 /* getlvalue() will unwind this and unregister s; could use a better solution */ 5258 } 5259 } 5260 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); // MOVSS/MOVSD reg,data 5261 checkSetVex(cdb.last(),tym); 5262 } 5263 else if (sz <= REGSIZE) 5264 { 5265 opcode_t opmv = 0x8B; // MOV reg,data 5266 if (sz == 2 && !I16 && config.target_cpu >= TARGET_PentiumPro && 5267 // Workaround for OSX linker bug: 5268 // ld: GOT load reloc does not point to a movq instruction in test42 for x86_64 5269 !(config.exe & EX_OSX64 && !(sytab[e.EV.Vsym.Sclass] & SCSS)) 5270 ) 5271 { 5272 // opmv = tyuns(tym) ? 0x0FB7 : 0x0FBF; // MOVZX/MOVSX 5273 } 5274 loadea(cdb, e, &cs, opmv, reg, 0, RMload, 0); 5275 } 5276 else if (sz <= 2 * REGSIZE && forregs & mES) 5277 { 5278 loadea(cdb, e, &cs, 0xC4, reg, 0, 0, mES); // LES data 5279 } 5280 else if (sz <= 2 * REGSIZE) 5281 { 5282 if (I32 && sz == 8 && 5283 (*pretregs & (mSTACK | mPSW)) == mSTACK) 5284 { 5285 assert(0); 5286 /+ 5287 /* Note that we allocreg(DOUBLEREGS) needlessly */ 5288 stackchanged = 1; 5289 int i = DOUBLESIZE - REGSIZE; 5290 do 5291 { 5292 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5293 cdb.genadjesp(REGSIZE); 5294 stackpush += REGSIZE; 5295 i -= REGSIZE; 5296 } 5297 while (i >= 0); 5298 return; 5299 +/ 5300 } 5301 5302 reg = findregmsw(forregs); 5303 loadea(cdb, e, &cs, 0x8B, reg, REGSIZE, forregs, 0); // MOV reg,data+2 5304 if (I32 && sz == REGSIZE + 2) 5305 cdb.last().Iflags |= CFopsize; // seg is 16 bits 5306 reg = findreglsw(forregs); 5307 loadea(cdb, e, &cs, 0x8B, reg, 0, forregs, 0); // MOV reg,data 5308 } 5309 else if (sz >= 8) 5310 { 5311 assert(!I32); 5312 if ((*pretregs & (mSTACK | mPSW)) == mSTACK) 5313 { 5314 // Note that we allocreg(DOUBLEREGS) needlessly 5315 stackchanged = 1; 5316 int i = sz - REGSIZE; 5317 do 5318 { 5319 loadea(cdb,e,&cs,0xFF,6,i,0,0); // PUSH EA+i 5320 cdb.genadjesp(REGSIZE); 5321 stackpush += REGSIZE; 5322 i -= REGSIZE; 5323 } 5324 while (i >= 0); 5325 return; 5326 } 5327 else 5328 { 5329 assert(reg == AX); 5330 loadea(cdb, e, &cs, 0x8B, AX, 6, 0, 0); // MOV AX,data+6 5331 loadea(cdb, e, &cs, 0x8B, BX, 4, mAX, 0); // MOV BX,data+4 5332 loadea(cdb, e, &cs, 0x8B, CX, 2, mAX|mBX, 0); // MOV CX,data+2 5333 loadea(cdb, e, &cs, 0x8B, DX, 0, mAX|mCX|mCX, 0); // MOV DX,data 5334 } 5335 } 5336 else 5337 assert(0); 5338 // Flags may already be set 5339 *pretregs &= flags | ~mPSW; 5340 fixresult(cdb, e, forregs, pretregs); 5341 return; 5342 } 5343 } 5344 5345 }