1 /** 2 * Top level code for the code generator. 3 * 4 * Copyright: Copyright (C) 1985-1998 by Symantec 5 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 6 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 7 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcod.d, backend/cgcod.d) 9 * Documentation: https://dlang.org/phobos/dmd_backend_cgcod.html 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcod.d 11 */ 12 13 module dmd.backend.cgcod; 14 15 version = FRAMEPTR; 16 17 version (SCPP) 18 version = COMPILE; 19 version (MARS) 20 version = COMPILE; 21 22 version (COMPILE) 23 { 24 25 import core.stdc.stdio; 26 import core.stdc.stdlib; 27 import core.stdc.string; 28 29 import dmd.backend.backend; 30 import dmd.backend.cc; 31 import dmd.backend.cdef; 32 import dmd.backend.code; 33 import dmd.backend.cgcse; 34 import dmd.backend.code_x86; 35 import dmd.backend.codebuilder; 36 import dmd.backend.dlist; 37 import dmd.backend.dvec; 38 import dmd.backend.melf; 39 import dmd.backend.mem; 40 import dmd.backend.el; 41 import dmd.backend.exh; 42 import dmd.backend.global; 43 import dmd.backend.obj; 44 import dmd.backend.oper; 45 import dmd.backend.outbuf; 46 import dmd.backend.rtlsym; 47 import dmd.backend.symtab; 48 import dmd.backend.ty; 49 import dmd.backend.type; 50 import dmd.backend.xmm; 51 52 import dmd.backend.barray; 53 54 version (SCPP) 55 { 56 import parser; 57 import precomp; 58 } 59 60 extern (C++): 61 62 nothrow: 63 64 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 65 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 66 67 version (MARS) 68 enum MARS = true; 69 else 70 enum MARS = false; 71 72 void dwarf_except_gentables(Funcsym *sfunc, uint startoffset, uint retoffset); 73 int REGSIZE(); 74 75 private extern (D) uint mask(uint m) { return 1 << m; } 76 77 78 __gshared 79 { 80 bool floatreg; // !=0 if floating register is required 81 82 int hasframe; // !=0 if this function has a stack frame 83 bool enforcealign; // enforced stack alignment 84 targ_size_t spoff; 85 targ_size_t Foff; // BP offset of floating register 86 targ_size_t CSoff; // offset of common sub expressions 87 targ_size_t NDPoff; // offset of saved 8087 registers 88 targ_size_t pushoff; // offset of saved registers 89 bool pushoffuse; // using pushoff 90 int BPoff; // offset from BP 91 int EBPtoESP; // add to EBP offset to get ESP offset 92 LocalSection Para; // section of function parameters 93 LocalSection Auto; // section of automatics and registers 94 LocalSection Fast; // section of fastpar 95 LocalSection EEStack; // offset of SCstack variables from ESP 96 LocalSection Alloca; // data for alloca() temporary 97 98 REGSAVE regsave; 99 100 CGstate cgstate; // state of code generator 101 102 regm_t BYTEREGS = BYTEREGS_INIT; 103 regm_t ALLREGS = ALLREGS_INIT; 104 105 106 /************************************ 107 * # of bytes that SP is beyond BP. 108 */ 109 110 uint stackpush; 111 112 int stackchanged; /* set to !=0 if any use of the stack 113 other than accessing parameters. Used 114 to see if we can address parameters 115 with ESP rather than EBP. 116 */ 117 int refparam; // !=0 if we referenced any parameters 118 int reflocal; // !=0 if we referenced any locals 119 bool anyiasm; // !=0 if any inline assembler 120 char calledafunc; // !=0 if we called a function 121 char needframe; // if true, then we will need the frame 122 // pointer (BP for the 8088) 123 char gotref; // !=0 if the GOTsym was referenced 124 uint usednteh; // if !=0, then used NT exception handling 125 bool calledFinally; // true if called a BC_finally block 126 127 /* Register contents */ 128 con_t regcon; 129 130 int pass; // PASSxxxx 131 132 private Symbol *retsym; // set to symbol that should be placed in 133 // register AX 134 135 /**************************** 136 * Register masks. 137 */ 138 139 regm_t msavereg; // Mask of registers that we would like to save. 140 // they are temporaries (set by scodelem()) 141 regm_t mfuncreg; // Mask of registers preserved by a function 142 143 regm_t allregs; // ALLREGS optionally including mBP 144 145 int dfoidx; /* which block we are in */ 146 147 targ_size_t funcoffset; // offset of start of function 148 targ_size_t prolog_allocoffset; // offset past adj of stack allocation 149 targ_size_t startoffset; // size of function entry code 150 targ_size_t retoffset; /* offset from start of func to ret code */ 151 targ_size_t retsize; /* size of function return */ 152 153 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs; 154 155 } 156 157 /********************************* 158 * Generate code for a function. 159 * Note at the end of this routine mfuncreg will contain the mask 160 * of registers not affected by the function. Some minor optimization 161 * possibilities are here. 162 * Params: 163 * sfunc = function to generate code for 164 */ 165 166 void codgen(Symbol *sfunc) 167 { 168 bool flag; 169 block *btry; 170 171 // Register usage. If a bit is on, the corresponding register is live 172 // in that basic block. 173 174 //printf("codgen('%s')\n",funcsym_p.Sident.ptr); 175 assert(sfunc == funcsym_p); 176 assert(cseg == funcsym_p.Sseg); 177 178 cgreg_init(); 179 CSE.initialize(); 180 tym_t functy = tybasic(sfunc.ty()); 181 cod3_initregs(); 182 allregs = ALLREGS; 183 pass = PASSinitial; 184 Alloca.init(); 185 anyiasm = 0; 186 187 if (config.ehmethod == EHmethod.EH_DWARF) 188 { 189 /* The dwarf unwinder relies on the function epilog to exist 190 */ 191 for (block* b = startblock; b; b = b.Bnext) 192 { 193 if (b.BC == BCexit) 194 b.BC = BCret; 195 } 196 } 197 198 tryagain: 199 debug 200 if (debugr) 201 printf("------------------ PASS%s -----------------\n", 202 (pass == PASSinitial) ? "init".ptr : ((pass == PASSreg) ? "reg".ptr : "final".ptr)); 203 204 lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0; 205 206 // if no parameters, assume we don't need a stack frame 207 needframe = 0; 208 enforcealign = false; 209 gotref = 0; 210 stackchanged = 0; 211 stackpush = 0; 212 refparam = 0; 213 calledafunc = 0; 214 retsym = null; 215 216 cgstate.stackclean = 1; 217 cgstate.funcarg.init(); 218 cgstate.funcargtos = ~0; 219 cgstate.accessedTLS = false; 220 STACKALIGN = TARGET_STACKALIGN; 221 222 regsave.reset(); 223 memset(global87.stack.ptr,0,global87.stack.sizeof); 224 225 calledFinally = false; 226 usednteh = 0; 227 228 static if (MARS && TARGET_WINDOS) 229 { 230 if (sfunc.Sfunc.Fflags3 & Fjmonitor) 231 usednteh |= NTEHjmonitor; 232 } 233 else version (SCPP) 234 { 235 if (CPP) 236 { 237 if (config.exe == EX_WIN32 && 238 (sfunc.Stype.Tflags & TFemptyexc || sfunc.Stype.Texcspec)) 239 usednteh |= NTEHexcspec; 240 except_reset(); 241 } 242 } 243 244 // Set on a trial basis, turning it off if anything might throw 245 sfunc.Sfunc.Fflags3 |= Fnothrow; 246 247 floatreg = false; 248 assert(global87.stackused == 0); /* nobody in 8087 stack */ 249 250 CSE.start(); 251 memset(®con,0,regcon.sizeof); 252 regcon.cse.mval = regcon.cse.mops = 0; // no common subs yet 253 msavereg = 0; 254 uint nretblocks = 0; 255 mfuncreg = fregsaved; // so we can see which are used 256 // (bit is cleared each time 257 // we use one) 258 for (block* b = startblock; b; b = b.Bnext) 259 { 260 memset(&b.Bregcon,0,b.Bregcon.sizeof); // Clear out values in registers 261 if (b.Belem) 262 resetEcomsub(b.Belem); // reset all the Ecomsubs 263 if (b.BC == BCasm) 264 anyiasm = 1; // we have inline assembler 265 if (b.BC == BCret || b.BC == BCretexp) 266 nretblocks++; 267 } 268 269 if (!config.fulltypes || (config.flags4 & CFG4optimized)) 270 { 271 regm_t noparams = 0; 272 for (int i = 0; i < globsym.length; i++) 273 { 274 Symbol *s = globsym[i]; 275 s.Sflags &= ~SFLread; 276 switch (s.Sclass) 277 { 278 case SCfastpar: 279 case SCshadowreg: 280 regcon.params |= s.Spregm(); 281 goto case SCparameter; 282 283 case SCparameter: 284 if (s.Sfl == FLreg) 285 noparams |= s.Sregm; 286 break; 287 288 default: 289 break; 290 } 291 } 292 regcon.params &= ~noparams; 293 } 294 295 if (config.flags4 & CFG4optimized) 296 { 297 if (nretblocks == 0 && // if no return blocks in function 298 !(sfunc.ty() & mTYnaked)) // naked functions may have hidden veys of returning 299 sfunc.Sflags |= SFLexit; // mark function as never returning 300 301 assert(dfo); 302 303 cgreg_reset(); 304 for (dfoidx = 0; dfoidx < dfo.length; dfoidx++) 305 { 306 regcon.used = msavereg | regcon.cse.mval; // registers already in use 307 block* b = dfo[dfoidx]; 308 blcodgen(b); // gen code in depth-first order 309 //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used)); 310 cgreg_used(dfoidx, b.Bregcon.used); // gather register used information 311 } 312 } 313 else 314 { 315 pass = PASSfinal; 316 for (block* b = startblock; b; b = b.Bnext) 317 blcodgen(b); // generate the code for each block 318 } 319 regcon.immed.mval = 0; 320 assert(!regcon.cse.mops); // should have all been used 321 322 // See which variables we can put into registers 323 if (pass != PASSfinal && 324 !anyiasm) // possible LEA or LES opcodes 325 { 326 allregs |= cod3_useBP(); // see if we can use EBP 327 328 // If pic code, but EBX was never needed 329 if (!(allregs & mask(PICREG)) && !gotref) 330 { 331 allregs |= mask(PICREG); // EBX can now be used 332 cgreg_assign(retsym); 333 pass = PASSreg; 334 } 335 else if (cgreg_assign(retsym)) // if we found some registers 336 pass = PASSreg; 337 else 338 pass = PASSfinal; 339 for (block* b = startblock; b; b = b.Bnext) 340 { 341 code_free(b.Bcode); 342 b.Bcode = null; 343 } 344 goto tryagain; 345 } 346 cgreg_term(); 347 348 version (SCPP) 349 { 350 if (CPP) 351 cgcod_eh(); 352 } 353 354 // See if we need to enforce a particular stack alignment 355 foreach (i; 0 .. globsym.length) 356 { 357 Symbol *s = globsym[i]; 358 359 if (Symbol_Sisdead(s, anyiasm)) 360 continue; 361 362 switch (s.Sclass) 363 { 364 case SCregister: 365 case SCauto: 366 case SCfastpar: 367 if (s.Sfl == FLreg) 368 break; 369 370 const sz = type_alignsize(s.Stype); 371 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX)) 372 { 373 STACKALIGN = sz; 374 enforcealign = true; 375 } 376 break; 377 378 default: 379 break; 380 } 381 } 382 383 stackoffsets(1); // compute addresses of stack variables 384 cod5_prol_epi(); // see where to place prolog/epilog 385 CSE.finish(); // compute addresses and sizes of CSE saves 386 387 if (configv.addlinenumbers) 388 objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg)); 389 390 // Otherwise, jmp's to startblock will execute the prolog again 391 assert(!startblock.Bpred); 392 393 CodeBuilder cdbprolog; cdbprolog.ctor(); 394 prolog(cdbprolog); // gen function start code 395 code *cprolog = cdbprolog.finish(); 396 if (cprolog) 397 pinholeopt(cprolog,null); // optimize 398 399 funcoffset = Offset(sfunc.Sseg); 400 targ_size_t coffset = Offset(sfunc.Sseg); 401 402 if (eecontext.EEelem) 403 genEEcode(); 404 405 for (block* b = startblock; b; b = b.Bnext) 406 { 407 // We couldn't do this before because localsize was unknown 408 switch (b.BC) 409 { 410 case BCret: 411 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked)) 412 { 413 CodeBuilder cdb; cdb.ctor(); 414 cdb.append(b.Bcode); 415 cdb.genlinnum(b.Bsrcpos); 416 b.Bcode = cdb.finish(); 417 } 418 goto case BCretexp; 419 420 case BCretexp: 421 epilog(b); 422 break; 423 424 default: 425 if (b.Bflags & BFLepilog) 426 epilog(b); 427 break; 428 } 429 assignaddr(b); // assign addresses 430 pinholeopt(b.Bcode,b); // do pinhole optimization 431 if (b.Bflags & BFLprolog) // do function prolog 432 { 433 startoffset = coffset + calcblksize(cprolog) - funcoffset; 434 b.Bcode = cat(cprolog,b.Bcode); 435 } 436 cgsched_block(b); 437 b.Bsize = calcblksize(b.Bcode); // calculate block size 438 if (b.Balign) 439 { 440 targ_size_t u = b.Balign - 1; 441 coffset = (coffset + u) & ~u; 442 } 443 b.Boffset = coffset; /* offset of this block */ 444 coffset += b.Bsize; /* offset of following block */ 445 } 446 447 debug 448 debugw && printf("code addr complete\n"); 449 450 // Do jump optimization 451 do 452 { 453 flag = false; 454 for (block* b = startblock; b; b = b.Bnext) 455 { 456 if (b.Bflags & BFLjmpoptdone) /* if no more jmp opts for this blk */ 457 continue; 458 int i = branch(b,0); // see if jmp => jmp short 459 if (i) // if any bytes saved 460 { targ_size_t offset; 461 462 b.Bsize -= i; 463 offset = b.Boffset + b.Bsize; 464 for (block* bn = b.Bnext; bn; bn = bn.Bnext) 465 { 466 if (bn.Balign) 467 { targ_size_t u = bn.Balign - 1; 468 469 offset = (offset + u) & ~u; 470 } 471 bn.Boffset = offset; 472 offset += bn.Bsize; 473 } 474 coffset = offset; 475 flag = true; 476 } 477 } 478 if (!I16 && !(config.flags4 & CFG4optimized)) 479 break; // use the long conditional jmps 480 } while (flag); // loop till no more bytes saved 481 482 debug 483 debugw && printf("code jump optimization complete\n"); 484 485 version (MARS) 486 { 487 if (usednteh & NTEH_try) 488 { 489 // Do this before code is emitted because we patch some instructions 490 nteh_filltables(); 491 } 492 } 493 494 // Compute starting offset for switch tables 495 targ_size_t swoffset; 496 int jmpseg = -1; 497 if (config.flags & CFGromable) 498 { 499 jmpseg = 0; 500 swoffset = coffset; 501 } 502 503 // Emit the generated code 504 if (eecontext.EEcompile == 1) 505 { 506 codout(sfunc.Sseg,eecontext.EEcode); 507 code_free(eecontext.EEcode); 508 version (SCPP) 509 { 510 el_free(eecontext.EEelem); 511 } 512 } 513 else 514 { 515 for (block* b = startblock; b; b = b.Bnext) 516 { 517 if (b.BC == BCjmptab || b.BC == BCswitch) 518 { 519 if (jmpseg == -1) 520 { 521 jmpseg = objmod.jmpTableSegment(sfunc); 522 swoffset = Offset(jmpseg); 523 } 524 swoffset = _align(0,swoffset); 525 b.Btableoffset = swoffset; /* offset of sw tab */ 526 swoffset += b.Btablesize; 527 } 528 jmpaddr(b.Bcode); /* assign jump addresses */ 529 530 debug 531 if (debugc) 532 { 533 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n", 534 cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg)); 535 if (b.Bcode) 536 printf( "First opcode of block is: %0x\n", b.Bcode.Iop ); 537 } 538 539 if (b.Balign) 540 { uint u = b.Balign; 541 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1); 542 543 cod3_align_bytes(sfunc.Sseg, nalign); 544 } 545 assert(b.Boffset == Offset(sfunc.Sseg)); 546 547 version (SCPP) 548 { 549 if (CPP && !(config.exe == EX_WIN32)) 550 { 551 //printf("b = %p, index = %d\n",b,b.Bindex); 552 //except_index_set(b.Bindex); 553 554 if (btry != b.Btry) 555 { 556 btry = b.Btry; 557 except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset); 558 } 559 if (b.BC == BCtry) 560 { 561 btry = b; 562 except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset); 563 } 564 } 565 } 566 567 codout(sfunc.Sseg,b.Bcode); // output code 568 } 569 if (coffset != Offset(sfunc.Sseg)) 570 { 571 debug 572 printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg)); 573 574 assert(0); 575 } 576 sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset; // size of function 577 578 static if (NTEXCEPTIONS || MARS) 579 { 580 version (MARS) 581 const nteh = usednteh & NTEH_try; 582 else static if (NTEXCEPTIONS) 583 const nteh = usednteh & NTEHcpp; 584 else 585 enum nteh = true; 586 if (nteh) 587 { 588 assert(!(config.flags & CFGromable)); 589 //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset); 590 objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset); 591 } 592 } 593 594 // Write out switch tables 595 flag = false; // true if last active block was a ret 596 for (block* b = startblock; b; b = b.Bnext) 597 { 598 switch (b.BC) 599 { 600 case BCjmptab: /* if jump table */ 601 outjmptab(b); /* write out jump table */ 602 goto Ldefault; 603 604 case BCswitch: 605 outswitab(b); /* write out switch table */ 606 goto Ldefault; 607 608 case BCret: 609 case BCretexp: 610 /* Compute offset to return code from start of function */ 611 retoffset = b.Boffset + b.Bsize - retsize - funcoffset; 612 version (MARS) 613 { 614 /* Add 3 bytes to retoffset in case we have an exception 615 * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT 616 * IT FIXES THE PROBLEM HERE AS WELL. 617 */ 618 if (usednteh & NTEH_try) 619 retoffset += 3; 620 } 621 flag = true; 622 break; 623 624 default: 625 Ldefault: 626 retoffset = b.Boffset + b.Bsize - funcoffset; 627 break; 628 } 629 } 630 if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked)) 631 /* put line number at end of function on the 632 start of the last instruction 633 */ 634 /* Instead, try offset to cleanup code */ 635 if (retoffset < sfunc.Ssize) 636 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset); 637 638 static if (TARGET_WINDOS && MARS) 639 { 640 if (config.exe == EX_WIN64) 641 win64_pdata(sfunc); 642 } 643 644 static if (MARS) 645 { 646 if (usednteh & NTEH_try) 647 { 648 // Do this before code is emitted because we patch some instructions 649 nteh_gentables(sfunc); 650 } 651 if (usednteh & (EHtry | EHcleanup) && // saw BCtry or BC_try or OPddtor 652 config.ehmethod == EHmethod.EH_DM) 653 { 654 except_gentables(); 655 } 656 if (config.ehmethod == EHmethod.EH_DWARF) 657 { 658 sfunc.Sfunc.Fstartblock = startblock; 659 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset); 660 sfunc.Sfunc.Fstartblock = null; 661 } 662 } 663 664 version (SCPP) 665 { 666 // Write out frame handler 667 if (NTEXCEPTIONS && usednteh & NTEHcpp) 668 { 669 nteh_framehandler(sfunc, except_gentables()); 670 } 671 else 672 { 673 if (NTEXCEPTIONS && usednteh & NTEH_try) 674 { 675 nteh_gentables(sfunc); 676 } 677 else 678 { 679 if (CPP) 680 except_gentables(); 681 } 682 } 683 } 684 685 for (block* b = startblock; b; b = b.Bnext) 686 { 687 code_free(b.Bcode); 688 b.Bcode = null; 689 } 690 } 691 692 // Mask of regs saved 693 // BUG: do interrupt functions save BP? 694 sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved); 695 696 debug 697 if (global87.stackused != 0) 698 printf("stackused = %d\n",global87.stackused); 699 700 assert(global87.stackused == 0); /* nobody in 8087 stack */ 701 702 global87.save.dtor(); // clean up ndp save array 703 } 704 705 /********************************************* 706 * Align sections on the stack. 707 * base negative offset of section from frame pointer 708 * alignment alignment to use 709 * bias difference between where frame pointer points and the STACKALIGNed 710 * part of the stack 711 * Returns: 712 * base revised downward so it is aligned 713 */ 714 targ_size_t alignsection(targ_size_t base, uint alignment, int bias) 715 { 716 assert(cast(int)base <= 0); 717 if (alignment > STACKALIGN) 718 alignment = STACKALIGN; 719 if (alignment) 720 { 721 int sz = cast(int)(-base + bias); 722 assert(sz >= 0); 723 sz &= (alignment - 1); 724 if (sz) 725 base -= alignment - sz; 726 } 727 return base; 728 } 729 730 /******************************* 731 * Generate code for a function start. 732 * Input: 733 * Offset(cseg) address of start of code 734 * Auto.alignment 735 * Output: 736 * Offset(cseg) adjusted for size of code generated 737 * EBPtoESP 738 * hasframe 739 * BPoff 740 */ 741 void prolog(ref CodeBuilder cdb) 742 { 743 bool enter; 744 745 //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident.ptr, needframe, Auto.alignment); 746 debug debugw && printf("funcstart()\n"); 747 regcon.immed.mval = 0; /* no values in registers yet */ 748 version (FRAMEPTR) 749 EBPtoESP = 0; 750 else 751 EBPtoESP = -REGSIZE; 752 hasframe = 0; 753 bool pushds = false; 754 BPoff = 0; 755 bool pushalloc = false; 756 tym_t tyf = funcsym_p.ty(); 757 tym_t tym = tybasic(tyf); 758 const farfunc = tyfarfunc(tym) != 0; 759 760 // Special Intel 64 bit ABI prolog setup for variadic functions 761 Symbol *sv64 = null; // set to __va_argsave 762 if (I64 && variadic(funcsym_p.Stype)) 763 { 764 /* The Intel 64 bit ABI scheme. 765 * abi_sysV_amd64.pdf 766 * Load arguments passed in registers into the varargs save area 767 * so they can be accessed by va_arg(). 768 */ 769 /* Look for __va_argsave 770 */ 771 for (SYMIDX si = 0; si < globsym.length; si++) 772 { 773 Symbol *s = globsym[si]; 774 if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0) 775 { 776 if (!(s.Sflags & SFLdead)) 777 sv64 = s; 778 break; 779 } 780 } 781 } 782 783 if (config.flags & CFGalwaysframe || 784 funcsym_p.Sfunc.Fflags3 & Ffakeeh || 785 /* The exception stack unwinding mechanism relies on the EBP chain being intact, 786 * so need frame if function can possibly throw 787 */ 788 !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) || 789 cgstate.accessedTLS || 790 sv64 791 ) 792 needframe = 1; 793 794 CodeBuilder cdbx; cdbx.ctor(); 795 796 Lagain: 797 spoff = 0; 798 char guessneedframe = needframe; 799 int cfa_offset = 0; 800 // if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru))) 801 // usednteh |= NTEHpassthru; 802 803 /* Compute BP offsets for variables on stack. 804 * The organization is: 805 * Para.size parameters 806 * -------- stack is aligned to STACKALIGN 807 * seg of return addr (if far function) 808 * IP of return addr 809 * BP. caller's BP 810 * DS (if Windows prolog/epilog) 811 * exception handling context symbol 812 * Fast.size fastpar 813 * Auto.size autos and regs 814 * regsave.off any saved registers 815 * Foff floating register 816 * Alloca.size alloca temporary 817 * CSoff common subs 818 * NDPoff any 8087 saved registers 819 * monitor context record 820 * any saved registers 821 */ 822 823 if (tym == TYifunc) 824 Para.size = 26; // how is this number derived? 825 else 826 { 827 version (FRAMEPTR) 828 { 829 Para.size = ((farfunc ? 2 : 1) + needframe) * REGSIZE; 830 if (needframe) 831 EBPtoESP = -REGSIZE; 832 } 833 else 834 Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE; 835 } 836 837 /* The real reason for the FAST section is because the implementation of contracts 838 * requires a consistent stack frame location for the 'this' pointer. But if varying 839 * stuff in Auto.offset causes different alignment for that section, the entire block can 840 * shift around, causing a crash in the contracts. 841 * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their 842 * own FAST section, which is never aligned at a size bigger than REGSIZE, and so 843 * its alignment never shifts around. 844 * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned 845 * individually rather than as a group. 846 */ 847 Fast.size = 0; 848 static if (NTEXCEPTIONS == 2) 849 { 850 Fast.size -= nteh_contextsym_size(); 851 version (MARS) 852 { 853 static if (TARGET_WINDOS) 854 { 855 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0) 856 Fast.size -= 5 * 4; 857 } 858 } 859 } 860 861 /* Despite what the comment above says, aligning Fast section to size greater 862 * than REGSIZE does not break contract implementation. Fast.offset and 863 * Fast.alignment must be the same for the overriding and 864 * the overridden function, since they have the same parameters. Fast.size 865 * must be the same because otherwise, contract inheritance wouldn't work 866 * even if we didn't align Fast section to size greater than REGSIZE. Therefore, 867 * the only way aligning the section could cause problems with contract 868 * inheritance is if bias (declared below) differed for the overridden 869 * and the overriding function. 870 * 871 * Bias depends on Para.size and needframe. The value of Para.size depends on 872 * whether the function is an interrupt handler and whether it is a farfunc. 873 * DMD does not have _interrupt attribute and D does not make a distinction 874 * between near and far functions, so Para.size should always be 2 * REGSIZE 875 * for D. 876 * 877 * The value of needframe depends on a global setting that is only set 878 * during backend's initialization and on function flag Ffakeeh. On Windows, 879 * that flag is always set for virtual functions, for which contracts are 880 * defined and on other platforms, it is never set. Because of that 881 * the value of neadframe should always be the same for the overridden 882 * and the overriding function, and so bias should be the same too. 883 */ 884 885 version (FRAMEPTR) 886 int bias = enforcealign ? 0 : cast(int)(Para.size); 887 else 888 int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE)); 889 890 if (Fast.alignment < REGSIZE) 891 Fast.alignment = REGSIZE; 892 893 Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias); 894 895 if (Auto.alignment < REGSIZE) 896 Auto.alignment = REGSIZE; // necessary because localsize must be REGSIZE aligned 897 Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias); 898 899 regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias); 900 //printf("regsave.off = x%x, size = x%x, alignment = %x\n", 901 //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment); 902 903 if (floatreg) 904 { 905 uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE; 906 Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias); 907 //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize); 908 } 909 else 910 Foff = regsave.off; 911 912 Alloca.alignment = REGSIZE; 913 Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias); 914 915 CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias); 916 //printf("CSoff = x%x, size = x%x, alignment = %x\n", 917 //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment); 918 919 NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias); 920 921 regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving 922 pushoffuse = false; 923 pushoff = NDPoff; 924 /* We don't keep track of all the pushes and pops in a function. Hence, 925 * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder 926 * won't be setting ESP correctly. With pushoffuse, the registers are restored 927 * from EBP, which is kept track of properly. 928 */ 929 if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64)) 930 { 931 /* Instead of pushing the registers onto the stack one by one, 932 * allocate space in the stack frame and copy/restore them there. 933 */ 934 int xmmtopush = numbitsset(topush & XMMREGS); // XMM regs take 16 bytes 935 int gptopush = numbitsset(topush) - xmmtopush; // general purpose registers to save 936 if (NDPoff || xmmtopush || cgstate.funcarg.size) 937 { 938 pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16), 939 xmmtopush ? STACKALIGN : REGSIZE, bias); 940 pushoffuse = true; // tell others we're using this strategy 941 } 942 } 943 944 //printf("Fast.size = x%x, Auto.size = x%x\n", (int)Fast.size, (int)Auto.size); 945 946 cgstate.funcarg.alignment = STACKALIGN; 947 /* If the function doesn't need the extra alignment, don't do it. 948 * Can expand on this by allowing for locals that don't need extra alignment 949 * and calling functions that don't need it. 950 */ 951 if (pushoff == 0 && !calledafunc && config.fpxmmregs && (I32 || I64)) 952 { 953 cgstate.funcarg.alignment = I64 ? 8 : 4; 954 } 955 956 //printf("pushoff = %d, size = %d, alignment = %d, bias = %d\n", cast(int)pushoff, cast(int)cgstate.funcarg.size, cast(int)cgstate.funcarg.alignment, cast(int)bias); 957 cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias); 958 959 localsize = -cgstate.funcarg.offset; 960 961 //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n", 962 //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize); 963 assert(cast(targ_ptrdiff_t)localsize >= 0); 964 965 // Keep the stack aligned by 8 for any subsequent function calls 966 if (!I16 && calledafunc && 967 (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 968 { 969 int npush = numbitsset(topush); // number of registers that need saving 970 npush += numbitsset(topush & XMMREGS); // XMM regs take 16 bytes, so count them twice 971 if (pushoffuse) 972 npush = 0; 973 974 //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n", 975 //npush, Para.size, needframe, localsize); 976 977 int sz = cast(int)(localsize + npush * REGSIZE); 978 if (!enforcealign) 979 { 980 version (FRAMEPTR) 981 sz += Para.size; 982 else 983 sz += Para.size + (needframe ? 0 : -REGSIZE); 984 } 985 if (sz & (STACKALIGN - 1)) 986 localsize += STACKALIGN - (sz & (STACKALIGN - 1)); 987 } 988 cgstate.funcarg.offset = -localsize; 989 990 //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n", 991 //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize); 992 993 uint xlocalsize = cast(uint)localsize; // amount to subtract from ESP to make room for locals 994 995 if (tyf & mTYnaked) // if no prolog/epilog for function 996 { 997 hasframe = 1; 998 return; 999 } 1000 1001 if (tym == TYifunc) 1002 { 1003 prolog_ifunc(cdbx,&tyf); 1004 hasframe = 1; 1005 cdb.append(cdbx); 1006 goto Lcont; 1007 } 1008 1009 /* Determine if we need BP set up */ 1010 if (enforcealign) 1011 { 1012 // we need BP to reset the stack before return 1013 // otherwise the return address is lost 1014 needframe = 1; 1015 1016 } 1017 else if (config.flags & CFGalwaysframe) 1018 needframe = 1; 1019 else 1020 { 1021 if (localsize) 1022 { 1023 if (I16 || 1024 !(config.flags4 & CFG4speed) || 1025 config.target_cpu < TARGET_Pentium || 1026 farfunc || 1027 config.flags & CFGstack || 1028 xlocalsize >= 0x1000 || 1029 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 1030 anyiasm || 1031 Alloca.size 1032 ) 1033 needframe = 1; 1034 } 1035 if (refparam && (anyiasm || I16)) 1036 needframe = 1; 1037 } 1038 1039 if (needframe) 1040 { 1041 assert(mfuncreg & mBP); // shouldn't have used mBP 1042 1043 if (!guessneedframe) // if guessed wrong 1044 goto Lagain; 1045 } 1046 1047 if (I16 && config.wflags & WFwindows && farfunc) 1048 { 1049 prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds); 1050 enter = false; // don't use ENTER instruction 1051 hasframe = 1; // we have a stack frame 1052 } 1053 else if (needframe) // if variables or parameters 1054 { 1055 prolog_frame(cdbx, farfunc, xlocalsize, enter, cfa_offset); 1056 hasframe = 1; 1057 } 1058 1059 /* Align the stack if necessary */ 1060 prolog_stackalign(cdbx); 1061 1062 /* Subtract from stack pointer the size of the local stack frame 1063 */ 1064 if (config.flags & CFGstack) // if stack overflow check 1065 { 1066 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 1067 if (Alloca.size) 1068 prolog_setupalloca(cdbx); 1069 } 1070 else if (needframe) /* if variables or parameters */ 1071 { 1072 if (xlocalsize) /* if any stack offset */ 1073 { 1074 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 1075 if (Alloca.size) 1076 prolog_setupalloca(cdbx); 1077 } 1078 else 1079 assert(Alloca.size == 0); 1080 } 1081 else if (xlocalsize) 1082 { 1083 assert(I32 || I64); 1084 prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc); 1085 version (FRAMEPTR) { } else 1086 BPoff += REGSIZE; 1087 } 1088 else 1089 assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor)); 1090 EBPtoESP += xlocalsize; 1091 if (hasframe) 1092 EBPtoESP += REGSIZE; 1093 1094 /* Win64 unwind needs the amount of code generated so far 1095 */ 1096 if (config.exe == EX_WIN64) 1097 { 1098 code *c = cdbx.peek(); 1099 pinholeopt(c, null); 1100 prolog_allocoffset = calcblksize(c); 1101 } 1102 1103 version (SCPP) 1104 { 1105 /* The idea is to generate trace for all functions if -Nc is not thrown. 1106 * If -Nc is thrown, generate trace only for global COMDATs, because those 1107 * are relevant to the FUNCTIONS statement in the linker .DEF file. 1108 * This same logic should be in epilog(). 1109 */ 1110 if (config.flags & CFGtrace && 1111 (!(config.flags4 & CFG4allcomdat) || 1112 funcsym_p.Sclass == SCcomdat || 1113 funcsym_p.Sclass == SCglobal || 1114 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 1115 ) 1116 ) 1117 { 1118 uint spalign = 0; 1119 int sz = cast(int)localsize; 1120 if (!enforcealign) 1121 { 1122 version (FRAMEPTR) 1123 sz += Para.size; 1124 else 1125 sz += Para.size + (needframe ? 0 : -REGSIZE); 1126 } 1127 if (STACKALIGN >= 16 && (sz & (STACKALIGN - 1))) 1128 spalign = STACKALIGN - (sz & (STACKALIGN - 1)); 1129 1130 if (spalign) 1131 { /* This could be avoided by moving the function call to after the 1132 * registers are saved. But I don't remember why the call is here 1133 * and not there. 1134 */ 1135 cod3_stackadj(cdbx, spalign); 1136 } 1137 1138 uint regsaved; 1139 prolog_trace(cdbx, farfunc, ®saved); 1140 1141 if (spalign) 1142 cod3_stackadj(cdbx, -spalign); 1143 useregs((ALLREGS | mBP | mES) & ~regsaved); 1144 } 1145 } 1146 1147 version (MARS) 1148 { 1149 if (usednteh & NTEHjmonitor) 1150 { Symbol *sthis; 1151 1152 for (SYMIDX si = 0; 1; si++) 1153 { assert(si < globsym.length); 1154 sthis = globsym[si]; 1155 if (strcmp(sthis.Sident.ptr,"this".ptr) == 0) 1156 break; 1157 } 1158 nteh_monitor_prolog(cdbx,sthis); 1159 EBPtoESP += 3 * 4; 1160 } 1161 } 1162 1163 cdb.append(cdbx); 1164 prolog_saveregs(cdb, topush, cfa_offset); 1165 1166 Lcont: 1167 1168 if (config.exe == EX_WIN64) 1169 { 1170 if (variadic(funcsym_p.Stype)) 1171 prolog_gen_win64_varargs(cdb); 1172 regm_t namedargs; 1173 prolog_loadparams(cdb, tyf, pushalloc, namedargs); 1174 return; 1175 } 1176 1177 prolog_ifunc2(cdb, tyf, tym, pushds); 1178 1179 static if (NTEXCEPTIONS == 2) 1180 { 1181 if (usednteh & NTEH_except) 1182 nteh_setsp(cdb, 0x89); // MOV __context[EBP].esp,ESP 1183 } 1184 1185 // Load register parameters off of the stack. Do not use 1186 // assignaddr(), as it will replace the stack reference with 1187 // the register! 1188 regm_t namedargs; 1189 prolog_loadparams(cdb, tyf, pushalloc, namedargs); 1190 1191 if (sv64) 1192 prolog_genvarargs(cdb, sv64, namedargs); 1193 1194 /* Alignment checks 1195 */ 1196 //assert(Auto.alignment <= STACKALIGN); 1197 //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0); 1198 } 1199 1200 /************************************ 1201 * Predicate for sorting auto symbols for qsort(). 1202 * Returns: 1203 * < 0 s1 goes farther from frame pointer 1204 * > 0 s1 goes nearer the frame pointer 1205 * = 0 no difference 1206 */ 1207 1208 extern (C) int 1209 autosort_cmp(scope const void *ps1, scope const void *ps2) 1210 { 1211 Symbol *s1 = *cast(Symbol **)ps1; 1212 Symbol *s2 = *cast(Symbol **)ps2; 1213 1214 /* Largest align size goes furthest away from frame pointer, 1215 * so they get allocated first. 1216 */ 1217 uint alignsize1 = Symbol_Salignsize(s1); 1218 uint alignsize2 = Symbol_Salignsize(s2); 1219 if (alignsize1 < alignsize2) 1220 return 1; 1221 else if (alignsize1 > alignsize2) 1222 return -1; 1223 1224 /* move variables nearer the frame pointer that have higher Sweights 1225 * because addressing mode is fewer bytes. Grouping together high Sweight 1226 * variables also may put them in the same cache 1227 */ 1228 if (s1.Sweight < s2.Sweight) 1229 return -1; 1230 else if (s1.Sweight > s2.Sweight) 1231 return 1; 1232 1233 /* More: 1234 * 1. put static arrays nearest the frame pointer, so buffer overflows 1235 * can't change other variable contents 1236 * 2. Do the coloring at the byte level to minimize stack usage 1237 */ 1238 return 0; 1239 } 1240 1241 /****************************** 1242 * Compute offsets for remaining tmp, automatic and register variables 1243 * that did not make it into registers. 1244 * Input: 1245 * flags 0: do estimate only 1246 * 1: final 1247 */ 1248 void stackoffsets(int flags) 1249 { 1250 //printf("stackoffsets() %s\n", funcsym_p.Sident); 1251 1252 Para.init(); // parameter offset 1253 Fast.init(); // SCfastpar offset 1254 Auto.init(); // automatic & register offset 1255 EEStack.init(); // for SCstack's 1256 1257 // Set if doing optimization of auto layout 1258 bool doAutoOpt = flags && config.flags4 & CFG4optimized; 1259 1260 // Put autos in another array so we can do optimizations on the stack layout 1261 Symbol*[10] autotmp; 1262 Symbol **autos = null; 1263 if (doAutoOpt) 1264 { 1265 if (globsym.length <= autotmp.length) 1266 autos = autotmp.ptr; 1267 else 1268 { autos = cast(Symbol **)malloc(globsym.length * (*autos).sizeof); 1269 assert(autos); 1270 } 1271 } 1272 size_t autosi = 0; // number used in autos[] 1273 1274 for (int si = 0; si < globsym.length; si++) 1275 { Symbol *s = globsym[si]; 1276 1277 /* Don't allocate space for dead or zero size parameters 1278 */ 1279 switch (s.Sclass) 1280 { 1281 case SCfastpar: 1282 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh)) 1283 goto Ldefault; // don't need consistent stack frame 1284 break; 1285 1286 case SCparameter: 1287 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty))) 1288 { 1289 Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary 1290 s.Soffset = Para.offset; 1291 continue; 1292 } 1293 break; // allocate even if it's dead 1294 1295 case SCshadowreg: 1296 break; // allocate even if it's dead 1297 1298 default: 1299 Ldefault: 1300 if (Symbol_Sisdead(s, anyiasm)) 1301 continue; // don't allocate space 1302 break; 1303 } 1304 1305 targ_size_t sz = type_size(s.Stype); 1306 if (sz == 0) 1307 sz++; // can't handle 0 length structs 1308 1309 uint alignsize = Symbol_Salignsize(s); 1310 if (alignsize > STACKALIGN) 1311 alignsize = STACKALIGN; // no point if the stack is less aligned 1312 1313 //printf("symbol '%s', size = x%lx, alignsize = %d, read = %x\n",s.Sident,(long)sz, (int)alignsize, s.Sflags & SFLread); 1314 assert(cast(int)sz >= 0); 1315 1316 switch (s.Sclass) 1317 { 1318 case SCfastpar: 1319 /* Get these 1320 * right next to the stack frame pointer, EBP. 1321 * Needed so we can call nested contract functions 1322 * frequire and fensure. 1323 */ 1324 if (s.Sfl == FLreg) // if allocated in register 1325 continue; 1326 /* Needed because storing fastpar's on the stack in prolog() 1327 * does the entire register 1328 */ 1329 if (sz < REGSIZE) 1330 sz = REGSIZE; 1331 1332 Fast.offset = _align(sz,Fast.offset); 1333 s.Soffset = Fast.offset; 1334 Fast.offset += sz; 1335 //printf("fastpar '%s' sz = %d, fast offset = x%x, %p\n",s.Sident,(int)sz,(int)s.Soffset, s); 1336 1337 if (alignsize > Fast.alignment) 1338 Fast.alignment = alignsize; 1339 break; 1340 1341 case SCregister: 1342 case SCauto: 1343 if (s.Sfl == FLreg) // if allocated in register 1344 break; 1345 1346 if (doAutoOpt) 1347 { autos[autosi++] = s; // deal with later 1348 break; 1349 } 1350 1351 Auto.offset = _align(sz,Auto.offset); 1352 s.Soffset = Auto.offset; 1353 Auto.offset += sz; 1354 //printf("auto '%s' sz = %d, auto offset = x%lx\n",s.Sident,sz,(long)s.Soffset); 1355 1356 if (alignsize > Auto.alignment) 1357 Auto.alignment = alignsize; 1358 break; 1359 1360 case SCstack: 1361 EEStack.offset = _align(sz,EEStack.offset); 1362 s.Soffset = EEStack.offset; 1363 //printf("EEStack.offset = x%lx\n",(long)s.Soffset); 1364 EEStack.offset += sz; 1365 break; 1366 1367 case SCshadowreg: 1368 case SCparameter: 1369 if (config.exe == EX_WIN64) 1370 { 1371 assert((Para.offset & 7) == 0); 1372 s.Soffset = Para.offset; 1373 Para.offset += 8; 1374 break; 1375 } 1376 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general, 1377 * but are 4 byte aligned on the OSX 32 stack. 1378 */ 1379 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */ 1380 if (alignsize >= 16 && 1381 (I64 || (config.exe == EX_OSX && 1382 (tyaggregate(s.ty()) || tyvector(s.ty()))))) 1383 Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1); 1384 s.Soffset = Para.offset; 1385 //printf("%s param offset = x%lx, alignsize = %d\n",s.Sident,(long)s.Soffset, (int)alignsize); 1386 Para.offset += (s.Sflags & SFLdouble) 1387 ? type_size(tstypes[TYdouble]) // float passed as double 1388 : type_size(s.Stype); 1389 break; 1390 1391 case SCpseudo: 1392 case SCstatic: 1393 case SCbprel: 1394 break; 1395 default: 1396 symbol_print(s); 1397 assert(0); 1398 } 1399 } 1400 1401 if (autosi) 1402 { 1403 qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp); 1404 1405 vec_t tbl = vec_calloc(autosi); 1406 1407 for (size_t si = 0; si < autosi; si++) 1408 { 1409 Symbol *s = autos[si]; 1410 1411 targ_size_t sz = type_size(s.Stype); 1412 if (sz == 0) 1413 sz++; // can't handle 0 length structs 1414 1415 uint alignsize = Symbol_Salignsize(s); 1416 if (alignsize > STACKALIGN) 1417 alignsize = STACKALIGN; // no point if the stack is less aligned 1418 1419 /* See if we can share storage with another variable 1420 * if their live ranges do not overlap. 1421 */ 1422 if (// Don't share because could stomp on variables 1423 // used in finally blocks 1424 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) && 1425 s.Srange && !(s.Sflags & SFLspill)) 1426 { 1427 for (size_t i = 0; i < si; i++) 1428 { 1429 if (!vec_testbit(i,tbl)) 1430 continue; 1431 Symbol *sp = autos[i]; 1432 //printf("auto s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange)); 1433 if (vec_disjoint(s.Srange,sp.Srange) && 1434 !(sp.Soffset & (alignsize - 1)) && 1435 sz <= type_size(sp.Stype)) 1436 { 1437 vec_or(sp.Srange,sp.Srange,s.Srange); 1438 //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident); 1439 s.Soffset = sp.Soffset; 1440 goto L2; 1441 } 1442 } 1443 } 1444 Auto.offset = _align(sz,Auto.offset); 1445 s.Soffset = Auto.offset; 1446 //printf("auto '%s' sz = %d, auto offset = x%lx\n",s.Sident,sz,(long)s.Soffset); 1447 Auto.offset += sz; 1448 if (s.Srange && !(s.Sflags & SFLspill)) 1449 vec_setbit(si,tbl); 1450 1451 if (alignsize > Auto.alignment) 1452 Auto.alignment = alignsize; 1453 L2: { } 1454 } 1455 1456 vec_free(tbl); 1457 1458 if (autos != autotmp.ptr) 1459 free(autos); 1460 } 1461 } 1462 1463 /**************************** 1464 * Generate code for a block. 1465 */ 1466 1467 private void blcodgen(block *bl) 1468 { 1469 regm_t mfuncregsave = mfuncreg; 1470 1471 //dbg_printf("blcodgen(%p)\n",bl); 1472 1473 /* Determine existing immediate values in registers by ANDing 1474 together the values from all the predecessors of b. 1475 */ 1476 assert(bl.Bregcon.immed.mval == 0); 1477 regcon.immed.mval = 0; // assume no previous contents in registers 1478 // regcon.cse.mval = 0; 1479 foreach (bpl; ListRange(bl.Bpred)) 1480 { 1481 block *bp = list_block(bpl); 1482 1483 if (bpl == bl.Bpred) 1484 { regcon.immed = bp.Bregcon.immed; 1485 regcon.params = bp.Bregcon.params; 1486 // regcon.cse = bp.Bregcon.cse; 1487 } 1488 else 1489 { 1490 int i; 1491 1492 regcon.params &= bp.Bregcon.params; 1493 if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0) 1494 // Actual values must match, too 1495 for (i = 0; i < REGMAX; i++) 1496 { 1497 if (regcon.immed.value[i] != bp.Bregcon.immed.value[i]) 1498 regcon.immed.mval &= ~mask(i); 1499 } 1500 } 1501 } 1502 regcon.cse.mops &= regcon.cse.mval; 1503 1504 // Set regcon.mvar according to what variables are in registers for this block 1505 CodeBuilder cdb; cdb.ctor(); 1506 regcon.mvar = 0; 1507 regcon.mpvar = 0; 1508 regcon.indexregs = 1; 1509 int anyspill = 0; 1510 char *sflsave = null; 1511 if (config.flags4 & CFG4optimized) 1512 { 1513 CodeBuilder cdbload; cdbload.ctor(); 1514 CodeBuilder cdbstore; cdbstore.ctor(); 1515 1516 sflsave = cast(char *) alloca(globsym.length * char.sizeof); 1517 for (SYMIDX i = 0; i < globsym.length; i++) 1518 { 1519 Symbol *s = globsym[i]; 1520 1521 sflsave[i] = s.Sfl; 1522 if (regParamInPreg(s) && 1523 regcon.params & s.Spregm() && 1524 vec_testbit(dfoidx,s.Srange)) 1525 { 1526 // regcon.used |= s.Spregm(); 1527 } 1528 1529 if (s.Sfl == FLreg) 1530 { 1531 if (vec_testbit(dfoidx,s.Srange)) 1532 { 1533 regcon.mvar |= s.Sregm; 1534 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 1535 regcon.mpvar |= s.Sregm; 1536 } 1537 } 1538 else if (s.Sflags & SFLspill) 1539 { 1540 if (vec_testbit(dfoidx,s.Srange)) 1541 { 1542 anyspill = cast(int)(i + 1); 1543 cgreg_spillreg_prolog(bl,s,cdbstore,cdbload); 1544 if (vec_testbit(dfoidx,s.Slvreg)) 1545 { 1546 s.Sfl = FLreg; 1547 regcon.mvar |= s.Sregm; 1548 regcon.cse.mval &= ~s.Sregm; 1549 regcon.immed.mval &= ~s.Sregm; 1550 regcon.params &= ~s.Sregm; 1551 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 1552 regcon.mpvar |= s.Sregm; 1553 } 1554 } 1555 } 1556 } 1557 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 1558 { 1559 cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval); 1560 } 1561 cdb.append(cdbstore); 1562 cdb.append(cdbload); 1563 mfuncreg &= ~regcon.mvar; // use these registers 1564 regcon.used |= regcon.mvar; 1565 1566 // Determine if we have more than 1 uncommitted index register 1567 regcon.indexregs = IDXREGS & ~regcon.mvar; 1568 regcon.indexregs &= regcon.indexregs - 1; 1569 } 1570 1571 /* This doesn't work when calling the BC_finally function, 1572 * as it is one block calling another. 1573 */ 1574 //regsave.idx = 0; 1575 1576 reflocal = 0; 1577 int refparamsave = refparam; 1578 refparam = 0; 1579 assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops); 1580 1581 outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave); 1582 bl.Bcode = cdb.finish(); 1583 1584 for (int i = 0; i < anyspill; i++) 1585 { 1586 Symbol *s = globsym[i]; 1587 s.Sfl = sflsave[i]; // undo block register assignments 1588 } 1589 1590 if (reflocal) 1591 bl.Bflags |= BFLreflocal; 1592 if (refparam) 1593 bl.Bflags |= BFLrefparam; 1594 refparam |= refparamsave; 1595 bl.Bregcon.immed = regcon.immed; 1596 bl.Bregcon.cse = regcon.cse; 1597 bl.Bregcon.used = regcon.used; 1598 bl.Bregcon.params = regcon.params; 1599 1600 debug 1601 debugw && printf("code gen complete\n"); 1602 } 1603 1604 /***************************************** 1605 * Add in exception handling code. 1606 */ 1607 1608 version (SCPP) 1609 { 1610 1611 private void cgcod_eh() 1612 { 1613 list_t stack; 1614 int idx; 1615 int tryidx; 1616 1617 if (!(usednteh & (EHtry | EHcleanup))) 1618 return; 1619 1620 // Compute Bindex for each block 1621 for (block *b = startblock; b; b = b.Bnext) 1622 { 1623 b.Bindex = -1; 1624 b.Bflags &= ~BFLvisited; /* mark as unvisited */ 1625 } 1626 block *btry = null; 1627 int lastidx = 0; 1628 startblock.Bindex = 0; 1629 for (block *b = startblock; b; b = b.Bnext) 1630 { 1631 if (btry == b.Btry && b.BC == BCcatch) // if don't need to pop try block 1632 { 1633 block *br = list_block(b.Bpred); // find corresponding try block 1634 assert(br.BC == BCtry); 1635 b.Bindex = br.Bindex; 1636 } 1637 else if (btry != b.Btry && b.BC != BCcatch || 1638 !(b.Bflags & BFLvisited)) 1639 b.Bindex = lastidx; 1640 b.Bflags |= BFLvisited; 1641 1642 debug 1643 if (debuge) 1644 { 1645 WRBC(b.BC); 1646 printf(" block (%p) Btry=%p Bindex=%d\n",b,b.Btry,b.Bindex); 1647 } 1648 1649 except_index_set(b.Bindex); 1650 if (btry != b.Btry) // exited previous try block 1651 { 1652 except_pop(b,null,btry); 1653 btry = b.Btry; 1654 } 1655 if (b.BC == BCtry) 1656 { 1657 except_push(b,null,b); 1658 btry = b; 1659 tryidx = except_index_get(); 1660 CodeBuilder cdb; cdb.ctor(); 1661 nteh_gensindex(cdb,tryidx - 1); 1662 cdb.append(b.Bcode); 1663 b.Bcode = cdb.finish(); 1664 } 1665 1666 stack = null; 1667 for (code *c = b.Bcode; c; c = code_next(c)) 1668 { 1669 if ((c.Iop & ESCAPEmask) == ESCAPE) 1670 { 1671 code *c1 = null; 1672 switch (c.Iop & 0xFFFF00) 1673 { 1674 case ESCctor: 1675 //printf("ESCctor\n"); 1676 except_push(c,c.IEV1.Vtor,null); 1677 goto L1; 1678 1679 case ESCdtor: 1680 //printf("ESCdtor\n"); 1681 except_pop(c,c.IEV1.Vtor,null); 1682 L1: if (config.exe == EX_WIN32) 1683 { 1684 CodeBuilder cdb; cdb.ctor(); 1685 nteh_gensindex(cdb,except_index_get() - 1); 1686 c1 = cdb.finish(); 1687 c1.next = code_next(c); 1688 c.next = c1; 1689 } 1690 break; 1691 1692 case ESCmark: 1693 //printf("ESCmark\n"); 1694 idx = except_index_get(); 1695 list_prependdata(&stack,idx); 1696 except_mark(); 1697 break; 1698 1699 case ESCrelease: 1700 //printf("ESCrelease\n"); 1701 version (SCPP) 1702 { 1703 idx = list_data(stack); 1704 list_pop(&stack); 1705 if (idx != except_index_get()) 1706 { 1707 if (config.exe == EX_WIN32) 1708 { 1709 CodeBuilder cdb; cdb.ctor(); 1710 nteh_gensindex(cdb,idx - 1); 1711 c1 = cdb.finish(); 1712 c1.next = code_next(c); 1713 c.next = c1; 1714 } 1715 else 1716 { except_pair_append(c,idx - 1); 1717 c.Iop = ESCAPE | ESCoffset; 1718 } 1719 } 1720 except_release(); 1721 } 1722 break; 1723 1724 case ESCmark2: 1725 //printf("ESCmark2\n"); 1726 except_mark(); 1727 break; 1728 1729 case ESCrelease2: 1730 //printf("ESCrelease2\n"); 1731 version (SCPP) 1732 { 1733 except_release(); 1734 } 1735 break; 1736 1737 default: 1738 break; 1739 } 1740 } 1741 } 1742 assert(stack == null); 1743 b.Bendindex = except_index_get(); 1744 1745 if (b.BC != BCret && b.BC != BCretexp) 1746 lastidx = b.Bendindex; 1747 1748 // Set starting index for each of the successors 1749 int i = 0; 1750 foreach (bl; ListRange(b.Bsucc)) 1751 { 1752 block *bs = list_block(bl); 1753 if (b.BC == BCtry) 1754 { 1755 switch (i) 1756 { 1757 case 0: // block after catches 1758 bs.Bindex = b.Bendindex; 1759 break; 1760 1761 case 1: // 1st catch block 1762 bs.Bindex = tryidx; 1763 break; 1764 1765 default: // subsequent catch blocks 1766 bs.Bindex = b.Bindex; 1767 break; 1768 } 1769 1770 debug 1771 if (debuge) 1772 { 1773 printf(" 1setting %p to %d\n",bs,bs.Bindex); 1774 } 1775 } 1776 else if (!(bs.Bflags & BFLvisited)) 1777 { 1778 bs.Bindex = b.Bendindex; 1779 1780 debug 1781 if (debuge) 1782 { 1783 printf(" 2setting %p to %d\n",bs,bs.Bindex); 1784 } 1785 } 1786 bs.Bflags |= BFLvisited; 1787 i++; 1788 } 1789 } 1790 1791 if (config.exe == EX_WIN32) 1792 for (block *b = startblock; b; b = b.Bnext) 1793 { 1794 if (/*!b.Bcount ||*/ b.BC == BCtry) 1795 continue; 1796 foreach (bl; ListRange(b.Bpred)) 1797 { 1798 int pi = list_block(bl).Bendindex; 1799 if (b.Bindex != pi) 1800 { 1801 CodeBuilder cdb; cdb.ctor(); 1802 nteh_gensindex(cdb,b.Bindex - 1); 1803 cdb.append(b.Bcode); 1804 b.Bcode = cdb.finish(); 1805 break; 1806 } 1807 } 1808 } 1809 } 1810 1811 } 1812 1813 /****************************** 1814 * Count the number of bits set in a register mask. 1815 */ 1816 1817 int numbitsset(regm_t regm) 1818 { 1819 int n = 0; 1820 if (regm) 1821 do 1822 n++; 1823 while ((regm &= regm - 1) != 0); 1824 return n; 1825 } 1826 1827 /****************************** 1828 * Given a register mask, find and return the number 1829 * of the first register that fits. 1830 */ 1831 1832 reg_t findreg(regm_t regm) 1833 { 1834 return findreg(regm, __LINE__, __FILE__); 1835 } 1836 1837 reg_t findreg(regm_t regm, int line, const(char)* file) 1838 { 1839 debug 1840 regm_t regmsave = regm; 1841 1842 reg_t i = 0; 1843 while (1) 1844 { 1845 if (!(regm & 0xF)) 1846 { 1847 regm >>= 4; 1848 i += 4; 1849 if (!regm) 1850 break; 1851 } 1852 if (regm & 1) 1853 return i; 1854 regm >>= 1; 1855 i++; 1856 } 1857 1858 debug 1859 printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr); 1860 fflush(stdout); 1861 1862 // *(char*)0=0; 1863 assert(0); 1864 } 1865 1866 /*************** 1867 * Free element (but not it's leaves! (assume they are already freed)) 1868 * Don't decrement Ecount! This is so we can detect if the common subexp 1869 * has already been evaluated. 1870 * If common subexpression is not required anymore, eliminate 1871 * references to it. 1872 */ 1873 1874 void freenode(elem *e) 1875 { 1876 elem_debug(e); 1877 //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount); 1878 if (e.Ecomsub--) return; /* usage count */ 1879 if (e.Ecount) /* if it was a CSE */ 1880 { 1881 for (size_t i = 0; i < regcon.cse.value.length; i++) 1882 { 1883 if (regcon.cse.value[i] == e) /* if a register is holding it */ 1884 { 1885 regcon.cse.mval &= ~mask(cast(uint)i); 1886 regcon.cse.mops &= ~mask(cast(uint)i); /* free masks */ 1887 } 1888 } 1889 CSE.remove(e); 1890 } 1891 } 1892 1893 /********************************* 1894 * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode(). 1895 */ 1896 1897 private void resetEcomsub(elem *e) 1898 { 1899 while (1) 1900 { 1901 elem_debug(e); 1902 e.Ecomsub = e.Ecount; 1903 const op = e.Eoper; 1904 if (!OTleaf(op)) 1905 { 1906 if (OTbinary(op)) 1907 resetEcomsub(e.EV.E2); 1908 e = e.EV.E1; 1909 } 1910 else 1911 break; 1912 } 1913 } 1914 1915 /********************************* 1916 * Determine if elem e is a register variable. 1917 * If so: 1918 * *pregm = mask of registers that make up the variable 1919 * *preg = the least significant register 1920 * returns true 1921 * Else 1922 * returns false 1923 */ 1924 1925 int isregvar(elem *e,regm_t *pregm,reg_t *preg) 1926 { 1927 Symbol *s; 1928 uint u; 1929 regm_t m; 1930 regm_t regm; 1931 reg_t reg; 1932 1933 elem_debug(e); 1934 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 1935 { 1936 s = e.EV.Vsym; 1937 switch (s.Sfl) 1938 { 1939 case FLreg: 1940 if (s.Sclass == SCparameter) 1941 { refparam = true; 1942 reflocal = true; 1943 } 1944 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw; 1945 regm = s.Sregm; 1946 //assert(tyreg(s.ty())); 1947 static if (0) 1948 { 1949 // Let's just see if there is a CSE in a reg we can use 1950 // instead. This helps avoid AGI's. 1951 if (e.Ecount && e.Ecount != e.Ecomsub) 1952 { int i; 1953 1954 for (i = 0; i < arraysize(regcon.cse.value); i++) 1955 { 1956 if (regcon.cse.value[i] == e) 1957 { reg = i; 1958 break; 1959 } 1960 } 1961 } 1962 } 1963 assert(regm & regcon.mvar && !(regm & ~regcon.mvar)); 1964 goto Lreg; 1965 1966 case FLpseudo: 1967 version (MARS) 1968 { 1969 u = s.Sreglsw; 1970 m = mask(u); 1971 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H 1972 { 1973 reg = u & 7; 1974 regm = m; 1975 goto Lreg; 1976 } 1977 } 1978 else 1979 { 1980 u = s.Sreglsw; 1981 m = pseudomask[u]; 1982 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H 1983 { 1984 reg = pseudoreg[u] & 7; 1985 regm = m; 1986 goto Lreg; 1987 } 1988 } 1989 break; 1990 1991 default: 1992 break; 1993 } 1994 } 1995 return false; 1996 1997 Lreg: 1998 if (preg) 1999 *preg = reg; 2000 if (pregm) 2001 *pregm = regm; 2002 return true; 2003 } 2004 2005 /********************************* 2006 * Allocate some registers. 2007 * Input: 2008 * pretregs Pointer to mask of registers to make selection from. 2009 * tym Mask of type we will store in registers. 2010 * Output: 2011 * *pretregs Mask of allocated registers. 2012 * *preg Register number of first allocated register. 2013 * msavereg,mfuncreg retregs bits are cleared. 2014 * regcon.cse.mval,regcon.cse.mops updated 2015 * Returns: 2016 * pointer to code generated if necessary to save any regcon.cse.mops on the 2017 * stack. 2018 */ 2019 2020 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym) 2021 { 2022 allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__); 2023 } 2024 2025 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym 2026 ,int line,const(char)* file) 2027 { 2028 reg_t reg; 2029 2030 static if (0) 2031 { 2032 if (pass == PASSfinal) 2033 { 2034 printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym ", 2035 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval), 2036 regm_str(msavereg),regm_str(*pretregs)); 2037 WRTYxx(tym); 2038 dbg_printf("\n"); 2039 } 2040 } 2041 tym = tybasic(tym); 2042 uint size = _tysize[tym]; 2043 *pretregs &= mES | allregs | XMMREGS; 2044 regm_t retregs = *pretregs; 2045 2046 debug if (retregs == 0) 2047 printf("allocreg: file %s(%d)\n", file, line); 2048 2049 if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars 2050 { 2051 if (size <= REGSIZE || (retregs & XMMREGS)) 2052 { 2053 *preg = findreg(retregs); 2054 assert(retregs == mask(*preg)); /* no more bits are set */ 2055 } 2056 else if (size <= 2 * REGSIZE) 2057 { 2058 *preg = findregmsw(retregs); 2059 assert(retregs & mLSW); 2060 } 2061 else 2062 assert(0); 2063 getregs(cdb,retregs); 2064 return; 2065 } 2066 int count = 0; 2067 L1: 2068 //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 2069 assert(++count < 20); /* fail instead of hanging if blocked */ 2070 assert(retregs); 2071 reg_t msreg = NOREG, lsreg = NOREG; /* no value assigned yet */ 2072 L3: 2073 //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 2074 regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params); 2075 if (!r) 2076 { 2077 r = retregs & ~(msavereg | regcon.cse.mval); 2078 if (!r) 2079 { 2080 r = retregs & ~(msavereg | regcon.cse.mops); 2081 if (!r) 2082 { r = retregs & ~msavereg; 2083 if (!r) 2084 r = retregs; 2085 } 2086 } 2087 } 2088 2089 if (size <= REGSIZE || retregs & XMMREGS) 2090 { 2091 if (r & ~mBP) 2092 r &= ~mBP; 2093 2094 // If only one index register, prefer to not use LSW registers 2095 if (!regcon.indexregs && r & ~mLSW) 2096 r &= ~mLSW; 2097 2098 if (pass == PASSfinal && r & ~lastretregs && !I16) 2099 { // Try not to always allocate the same register, 2100 // to schedule better 2101 2102 r &= ~lastretregs; 2103 if (r & ~last2retregs) 2104 { 2105 r &= ~last2retregs; 2106 if (r & ~last3retregs) 2107 { 2108 r &= ~last3retregs; 2109 if (r & ~last4retregs) 2110 { 2111 r &= ~last4retregs; 2112 // if (r & ~last5retregs) 2113 // r &= ~last5retregs; 2114 } 2115 } 2116 } 2117 if (r & ~mfuncreg) 2118 r &= ~mfuncreg; 2119 } 2120 reg = findreg(r); 2121 retregs = mask(reg); 2122 } 2123 else if (size <= 2 * REGSIZE) 2124 { 2125 /* Select pair with both regs free. Failing */ 2126 /* that, select pair with one reg free. */ 2127 2128 if (r & mBP) 2129 { 2130 retregs &= ~mBP; 2131 goto L3; 2132 } 2133 2134 if (r & mMSW) 2135 { 2136 if (r & mDX) 2137 msreg = DX; /* prefer to use DX over CX */ 2138 else 2139 msreg = findregmsw(r); 2140 r &= mLSW; /* see if there's an LSW also */ 2141 if (r) 2142 lsreg = findreg(r); 2143 else if (lsreg == NOREG) /* if don't have LSW yet */ 2144 { 2145 retregs &= mLSW; 2146 goto L3; 2147 } 2148 } 2149 else 2150 { 2151 if (I64 && !(r & mLSW)) 2152 { 2153 retregs = *pretregs & (mMSW | mLSW); 2154 assert(retregs); 2155 goto L1; 2156 } 2157 lsreg = findreglsw(r); 2158 if (msreg == NOREG) 2159 { 2160 retregs &= mMSW; 2161 assert(retregs); 2162 goto L3; 2163 } 2164 } 2165 reg = (msreg == ES) ? lsreg : msreg; 2166 retregs = mask(msreg) | mask(lsreg); 2167 } 2168 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 2169 { 2170 debug 2171 if (retregs != DOUBLEREGS) 2172 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs)); 2173 2174 assert(retregs == DOUBLEREGS); 2175 reg = AX; 2176 } 2177 else 2178 { 2179 debug 2180 { 2181 WRTYxx(tym); 2182 printf("\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n", 2183 file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym); 2184 } 2185 assert(0); 2186 } 2187 if (retregs & regcon.mvar) // if conflict with reg vars 2188 { 2189 if (!(size > REGSIZE && *pretregs == (mAX | mDX))) 2190 { 2191 retregs = (*pretregs &= ~(retregs & regcon.mvar)); 2192 goto L1; // try other registers 2193 } 2194 } 2195 *preg = reg; 2196 *pretregs = retregs; 2197 2198 //printf("Allocating %s\n",regm_str(retregs)); 2199 last5retregs = last4retregs; 2200 last4retregs = last3retregs; 2201 last3retregs = last2retregs; 2202 last2retregs = lastretregs; 2203 lastretregs = retregs; 2204 getregs(cdb, retregs); 2205 } 2206 2207 2208 /***************************************** 2209 * Allocate a scratch register. 2210 * Params: 2211 * cdb = where to write any generated code to 2212 * regm = mask of registers to pick one from 2213 * Returns: 2214 * selected register 2215 */ 2216 reg_t allocScratchReg(ref CodeBuilder cdb, regm_t regm) 2217 { 2218 reg_t r; 2219 allocreg(cdb, ®m, &r, TYoffset); 2220 return r; 2221 } 2222 2223 2224 /****************************** 2225 * Determine registers that should be destroyed upon arrival 2226 * to code entry point for exception handling. 2227 */ 2228 regm_t lpadregs() 2229 { 2230 regm_t used; 2231 if (config.ehmethod == EHmethod.EH_DWARF) 2232 used = allregs & ~mfuncreg; 2233 else 2234 used = (I32 | I64) ? allregs : (ALLREGS | mES); 2235 //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg)); 2236 return used; 2237 } 2238 2239 2240 /************************* 2241 * Mark registers as used. 2242 */ 2243 2244 void useregs(regm_t regm) 2245 { 2246 //printf("useregs(x%x) %s\n", regm, regm_str(regm)); 2247 mfuncreg &= ~regm; 2248 regcon.used |= regm; // registers used in this block 2249 regcon.params &= ~regm; 2250 if (regm & regcon.mpvar) // if modified a fastpar register variable 2251 regcon.params = 0; // toss them all out 2252 } 2253 2254 /************************* 2255 * We are going to use the registers in mask r. 2256 * Generate any code necessary to save any regs. 2257 */ 2258 2259 void getregs(ref CodeBuilder cdb, regm_t r) 2260 { 2261 //printf("getregs(x%x) %s\n", r, regm_str(r)); 2262 regm_t ms = r & regcon.cse.mops; // mask of common subs we must save 2263 useregs(r); 2264 regcon.cse.mval &= ~r; 2265 msavereg &= ~r; // regs that are destroyed 2266 regcon.immed.mval &= ~r; 2267 if (ms) 2268 cse_save(cdb, ms); 2269 } 2270 2271 /************************* 2272 * We are going to use the registers in mask r. 2273 * Same as getregs(), but assert if code is needed to be generated. 2274 */ 2275 void getregsNoSave(regm_t r) 2276 { 2277 //printf("getregsNoSave(x%x) %s\n", r, regm_str(r)); 2278 assert(!(r & regcon.cse.mops)); // mask of common subs we must save 2279 useregs(r); 2280 regcon.cse.mval &= ~r; 2281 msavereg &= ~r; // regs that are destroyed 2282 regcon.immed.mval &= ~r; 2283 } 2284 2285 /***************************************** 2286 * Copy registers in cse.mops into memory. 2287 */ 2288 2289 private void cse_save(ref CodeBuilder cdb, regm_t ms) 2290 { 2291 assert((ms & regcon.cse.mops) == ms); 2292 regcon.cse.mops &= ~ms; 2293 2294 /* Skip CSEs that are already saved */ 2295 for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1) 2296 { 2297 if (regm & ms) 2298 { 2299 const e = regcon.cse.value[findreg(regm)]; 2300 const sz = tysize(e.Ety); 2301 foreach (const ref cse; CSE.filter(e)) 2302 { 2303 if (sz <= REGSIZE || 2304 sz <= 2 * REGSIZE && 2305 (regm & mMSW && cse.regm & mMSW || 2306 regm & mLSW && cse.regm & mLSW) || 2307 sz == 4 * REGSIZE && regm == cse.regm 2308 ) 2309 { 2310 ms &= ~regm; 2311 if (!ms) 2312 return; 2313 break; 2314 } 2315 } 2316 } 2317 } 2318 2319 while (ms) 2320 { 2321 auto cse = CSE.add(); 2322 reg_t reg = findreg(ms); /* the register to save */ 2323 cse.e = regcon.cse.value[reg]; 2324 cse.regm = mask(reg); 2325 2326 ms &= ~mask(reg); /* turn off reg bit in ms */ 2327 2328 // If we can simply reload the CSE, we don't need to save it 2329 if (cse_simple(&cse.csimple, cse.e)) 2330 cse.flags |= CSEsimple; 2331 else 2332 { 2333 CSE.updateSizeAndAlign(cse.e); 2334 gen_storecse(cdb, cse.e.Ety, reg, cse.slot); 2335 reflocal = true; 2336 } 2337 } 2338 } 2339 2340 /****************************************** 2341 * Getregs without marking immediate register values as gone. 2342 */ 2343 2344 void getregs_imm(ref CodeBuilder cdb, regm_t r) 2345 { 2346 regm_t save = regcon.immed.mval; 2347 getregs(cdb,r); 2348 regcon.immed.mval = save; 2349 } 2350 2351 /****************************************** 2352 * Flush all CSE's out of registers and into memory. 2353 * Input: 2354 * do87 !=0 means save 87 registers too 2355 */ 2356 2357 void cse_flush(ref CodeBuilder cdb, int do87) 2358 { 2359 //dbg_printf("cse_flush()\n"); 2360 cse_save(cdb,regcon.cse.mops); // save any CSEs to memory 2361 if (do87) 2362 save87(cdb); // save any 8087 temporaries 2363 } 2364 2365 /************************* 2366 * Common subexpressions exist in registers. Note this in regcon.cse.mval. 2367 * Input: 2368 * e the subexpression 2369 * regm mask of registers holding it 2370 * opsflag if != 0 then regcon.cse.mops gets set too 2371 * Returns: 2372 * false not saved as a CSE 2373 * true saved as a CSE 2374 */ 2375 2376 bool cssave(elem *e,regm_t regm,uint opsflag) 2377 { 2378 bool result = false; 2379 2380 /*if (e.Ecount && e.Ecount == e.Ecomsub)*/ 2381 if (e.Ecount && e.Ecomsub) 2382 { 2383 if (!opsflag && pass != PASSfinal && (I32 || I64)) 2384 return false; 2385 2386 //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag); 2387 regm &= mBP | ALLREGS | mES | XMMREGS; /* just to be sure */ 2388 2389 /+ 2390 /* Do not register CSEs if they are register variables and */ 2391 /* are not operator nodes. This forces the register allocation */ 2392 /* to go through allocreg(), which will prevent using register */ 2393 /* variables for scratch. */ 2394 if (opsflag || !(regm & regcon.mvar)) 2395 +/ 2396 for (uint i = 0; regm; i++) 2397 { 2398 regm_t mi = mask(i); 2399 if (regm & mi) 2400 { 2401 regm &= ~mi; 2402 2403 // If we don't need this CSE, and the register already 2404 // holds a CSE that we do need, don't mark the new one 2405 if (regcon.cse.mval & mi && regcon.cse.value[i] != e && 2406 !opsflag && regcon.cse.mops & mi) 2407 continue; 2408 2409 regcon.cse.mval |= mi; 2410 if (opsflag) 2411 regcon.cse.mops |= mi; 2412 //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e); 2413 regcon.cse.value[i] = e; 2414 result = true; 2415 } 2416 } 2417 } 2418 return result; 2419 } 2420 2421 /************************************* 2422 * Determine if a computation should be done into a register. 2423 */ 2424 2425 bool evalinregister(elem *e) 2426 { 2427 if (config.exe == EX_WIN64 && e.Eoper == OPrelconst) 2428 return true; 2429 2430 if (e.Ecount == 0) /* elem is not a CSE, therefore */ 2431 /* we don't need to evaluate it */ 2432 /* in a register */ 2433 return false; 2434 if (!OTleaf(e.Eoper)) /* operators are always in register */ 2435 return true; 2436 2437 // Need to rethink this code if float or double can be CSE'd 2438 uint sz = tysize(e.Ety); 2439 if (e.Ecount == e.Ecomsub) /* elem is a CSE that needs */ 2440 /* to be generated */ 2441 { 2442 if ((I32 || I64) && 2443 //pass == PASSfinal && // bug 8987 2444 sz <= REGSIZE) 2445 { 2446 // Do it only if at least 2 registers are available 2447 regm_t m = allregs & ~regcon.mvar; 2448 if (sz == 1) 2449 m &= BYTEREGS; 2450 if (m & (m - 1)) // if more than one register 2451 { // Need to be at least 3 registers available, as 2452 // addressing modes can use up 2. 2453 while (!(m & 1)) 2454 m >>= 1; 2455 m >>= 1; 2456 if (m & (m - 1)) 2457 return true; 2458 } 2459 } 2460 return false; 2461 } 2462 2463 /* Elem is now a CSE that might have been generated. If so, and */ 2464 /* it's in a register already, the computation should be done */ 2465 /* using that register. */ 2466 regm_t emask = 0; 2467 for (uint i = 0; i < regcon.cse.value.length; i++) 2468 if (regcon.cse.value[i] == e) 2469 emask |= mask(i); 2470 emask &= regcon.cse.mval; // mask of available CSEs 2471 if (sz <= REGSIZE) 2472 return emask != 0; /* the CSE is in a register */ 2473 else if (sz <= 2 * REGSIZE) 2474 return (emask & mMSW) && (emask & mLSW); 2475 return true; /* cop-out for now */ 2476 } 2477 2478 /******************************************************* 2479 * Return mask of scratch registers. 2480 */ 2481 2482 regm_t getscratch() 2483 { 2484 regm_t scratch = 0; 2485 if (pass == PASSfinal) 2486 { 2487 scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval | 2488 regcon.immed.mval | regcon.params | mfuncreg); 2489 } 2490 return scratch; 2491 } 2492 2493 /****************************** 2494 * Evaluate an elem that is a common subexp that has been encountered 2495 * before. 2496 * Look first to see if it is already in a register. 2497 */ 2498 2499 private void comsub(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2500 { 2501 tym_t tym; 2502 regm_t regm,emask; 2503 reg_t reg; 2504 uint byte_,sz; 2505 2506 //printf("comsub(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 2507 elem_debug(e); 2508 2509 debug 2510 { 2511 if (e.Ecomsub > e.Ecount) 2512 elem_print(e); 2513 } 2514 2515 assert(e.Ecomsub <= e.Ecount); 2516 2517 if (*pretregs == 0) // no possible side effects anyway 2518 { 2519 return; 2520 } 2521 2522 /* First construct a mask, emask, of all the registers that 2523 * have the right contents. 2524 */ 2525 emask = 0; 2526 for (uint i = 0; i < regcon.cse.value.length; i++) 2527 { 2528 //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]); 2529 if (regcon.cse.value[i] == e) // if contents are right 2530 emask |= mask(i); // turn on bit for reg 2531 } 2532 emask &= regcon.cse.mval; // make sure all bits are valid 2533 2534 if (emask & XMMREGS && *pretregs == mPSW) 2535 { } 2536 else if (tyxmmreg(e.Ety) && config.fpxmmregs) 2537 { 2538 if (*pretregs & (mST0 | mST01)) 2539 { 2540 regm_t retregs = *pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1; 2541 comsub(cdb, e, &retregs); 2542 fixresult(cdb,e,retregs,pretregs); 2543 return; 2544 } 2545 } 2546 else if (tyfloating(e.Ety) && config.inline8087) 2547 { 2548 comsub87(cdb,e,pretregs); 2549 return; 2550 } 2551 2552 2553 /* create mask of CSEs */ 2554 regm_t csemask = CSE.mask(e); 2555 csemask &= ~emask; // stuff already in registers 2556 2557 debug if (debugw) 2558 { 2559 printf("comsub(e=%p): *pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n", 2560 e,regm_str(*pretregs),regm_str(emask),regm_str(csemask), 2561 regm_str(regcon.cse.mval),regm_str(regcon.mvar)); 2562 if (regcon.cse.mval & 1) 2563 elem_print(regcon.cse.value[0]); 2564 } 2565 2566 tym = tybasic(e.Ety); 2567 sz = _tysize[tym]; 2568 byte_ = sz == 1; 2569 2570 if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register 2571 { 2572 /* First see if it is already in a correct register */ 2573 2574 regm = emask & *pretregs; 2575 if (regm == 0) 2576 regm = emask; /* try any other register */ 2577 if (regm) /* if it's in a register */ 2578 { 2579 if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (*pretregs & regcon.mvar) == *pretregs) 2580 { 2581 regm = mask(findreg(regm)); 2582 fixresult(cdb,e,regm,pretregs); 2583 return; 2584 } 2585 } 2586 2587 if (OTleaf(e.Eoper)) /* if not op or func */ 2588 goto reload; /* reload data */ 2589 2590 foreach (ref cse; CSE.filter(e)) 2591 { 2592 regm_t retregs; 2593 2594 if (cse.flags & CSEsimple) 2595 { 2596 retregs = *pretregs; 2597 if (byte_ && !(retregs & BYTEREGS)) 2598 retregs = BYTEREGS; 2599 else if (!(retregs & allregs)) 2600 retregs = allregs; 2601 allocreg(cdb,&retregs,®,tym); 2602 code *cr = &cse.csimple; 2603 cr.setReg(reg); 2604 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1) 2605 cr.Irex |= REX; 2606 cdb.gen(cr); 2607 goto L10; 2608 } 2609 else 2610 { 2611 reflocal = true; 2612 cse.flags |= CSEload; 2613 if (*pretregs == mPSW) // if result in CCs only 2614 { 2615 if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety))) 2616 { 2617 retregs = XMMREGS; 2618 allocreg(cdb,&retregs,®,tym); 2619 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2620 regcon.cse.mval |= mask(reg); // cs is in a reg 2621 regcon.cse.value[reg] = e; 2622 fixresult(cdb,e,retregs,pretregs); 2623 } 2624 else 2625 { 2626 // CMP cs[BP],0 2627 gen_testcse(cdb, cse.e.Ety, sz, cse.slot); 2628 } 2629 } 2630 else 2631 { 2632 retregs = *pretregs; 2633 if (byte_ && !(retregs & BYTEREGS)) 2634 retregs = BYTEREGS; 2635 allocreg(cdb,&retregs,®,tym); 2636 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2637 L10: 2638 regcon.cse.mval |= mask(reg); // cs is in a reg 2639 regcon.cse.value[reg] = e; 2640 fixresult(cdb,e,retregs,pretregs); 2641 } 2642 } 2643 return; 2644 } 2645 2646 debug 2647 { 2648 printf("couldn't find cse e = %p, pass = %d\n",e,pass); 2649 elem_print(e); 2650 } 2651 assert(0); /* should have found it */ 2652 } 2653 else /* reg pair is req'd */ 2654 if (sz <= 2 * REGSIZE) 2655 { 2656 reg_t msreg,lsreg; 2657 2658 /* see if we have both */ 2659 if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP))) 2660 { /* we don't have both */ 2661 debug if (!OTleaf(e.Eoper)) 2662 { 2663 printf("e = %p, op = x%x, emask = %s, csemask = %s\n", 2664 e,e.Eoper,regm_str(emask),regm_str(csemask)); 2665 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW); 2666 elem_print(e); 2667 } 2668 2669 assert(OTleaf(e.Eoper)); /* must have both for operators */ 2670 goto reload; 2671 } 2672 2673 /* Look for right vals in any regs */ 2674 regm = *pretregs & mMSW; 2675 if (emask & regm) 2676 msreg = findreg(emask & regm); 2677 else if (emask & mMSW) 2678 msreg = findregmsw(emask); 2679 else /* reload from cse array */ 2680 { 2681 if (!regm) 2682 regm = mMSW & ALLREGS; 2683 allocreg(cdb,®m,&msreg,TYint); 2684 loadcse(cdb,e,msreg,mMSW); 2685 } 2686 2687 regm = *pretregs & (mLSW | mBP); 2688 if (emask & regm) 2689 lsreg = findreg(emask & regm); 2690 else if (emask & (mLSW | mBP)) 2691 lsreg = findreglsw(emask); 2692 else 2693 { 2694 if (!regm) 2695 regm = mLSW; 2696 allocreg(cdb,®m,&lsreg,TYint); 2697 loadcse(cdb,e,lsreg,mLSW | mBP); 2698 } 2699 2700 regm = mask(msreg) | mask(lsreg); /* mask of result */ 2701 fixresult(cdb,e,regm,pretregs); 2702 return; 2703 } 2704 else if (tym == TYdouble || tym == TYdouble_alias) // double 2705 { 2706 assert(I16); 2707 if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16) 2708 { 2709 static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d 2710 for (reg = 0; reg != NOREG; reg = dblreg[reg]) 2711 { 2712 assert(cast(int) reg >= 0 && reg <= 7); 2713 if (mask(reg) & csemask) 2714 loadcse(cdb,e,reg,mask(reg)); 2715 } 2716 regm = DOUBLEREGS_16; 2717 fixresult(cdb,e,regm,pretregs); 2718 return; 2719 } 2720 if (OTleaf(e.Eoper)) goto reload; 2721 2722 debug 2723 printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask)); 2724 2725 assert(0); 2726 } 2727 else 2728 { 2729 debug 2730 printf("e = %p, tym = x%x\n",e,tym); 2731 2732 assert(0); 2733 } 2734 2735 reload: /* reload result from memory */ 2736 switch (e.Eoper) 2737 { 2738 case OPrelconst: 2739 cdrelconst(cdb,e,pretregs); 2740 break; 2741 2742 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2743 { 2744 case OPgot: 2745 cdgot(cdb,e,pretregs); 2746 break; 2747 } 2748 default: 2749 if (*pretregs == mPSW && 2750 config.fpxmmregs && 2751 (tyxmmreg(tym) || tysimd(tym))) 2752 { 2753 regm_t retregs = XMMREGS | mPSW; 2754 loaddata(cdb,e,&retregs); 2755 cssave(e,retregs,false); 2756 return; 2757 } 2758 loaddata(cdb,e,pretregs); 2759 break; 2760 } 2761 cssave(e,*pretregs,false); 2762 } 2763 2764 2765 /***************************** 2766 * Load reg from cse save area on stack. 2767 */ 2768 2769 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm) 2770 { 2771 foreach (ref cse; CSE.filter(e)) 2772 { 2773 //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm)); 2774 if (cse.regm & regm) 2775 { 2776 reflocal = true; 2777 cse.flags |= CSEload; /* it was loaded */ 2778 regcon.cse.value[reg] = e; 2779 regcon.cse.mval |= mask(reg); 2780 getregs(cdb,mask(reg)); 2781 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2782 return; 2783 } 2784 } 2785 debug 2786 { 2787 printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm)); 2788 elem_print(e); 2789 } 2790 assert(0); 2791 } 2792 2793 /*************************** 2794 * Generate code sequence for an elem. 2795 * Input: 2796 * pretregs = mask of possible registers to return result in 2797 * Note: longs are in AX,BX or CX,DX or SI,DI 2798 * doubles are AX,BX,CX,DX only 2799 * constflag = 1 for user of result will not modify the 2800 * registers returned in *pretregs. 2801 * 2 for freenode() not called. 2802 * Output: 2803 * *pretregs mask of registers result is returned in 2804 * Returns: 2805 * pointer to code sequence generated 2806 */ 2807 2808 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op) 2809 { 2810 (*cdxxx[op])(cdb,e,pretregs); 2811 } 2812 2813 // jump table 2814 private extern (C++) __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx = 2815 [ 2816 OPunde: &cderr, 2817 OPadd: &cdorth, 2818 OPmul: &cdmul, 2819 OPand: &cdorth, 2820 OPmin: &cdorth, 2821 OPnot: &cdnot, 2822 OPcom: &cdcom, 2823 OPcond: &cdcond, 2824 OPcomma: &cdcomma, 2825 OPremquo: &cddiv, 2826 OPdiv: &cddiv, 2827 OPmod: &cddiv, 2828 OPxor: &cdorth, 2829 OPstring: &cderr, 2830 OPrelconst: &cdrelconst, 2831 OPinp: &cdport, 2832 OPoutp: &cdport, 2833 OPasm: &cdasm, 2834 OPinfo: &cdinfo, 2835 OPdctor: &cddctor, 2836 OPddtor: &cdddtor, 2837 OPctor: &cdctor, 2838 OPdtor: &cddtor, 2839 OPmark: &cdmark, 2840 OPvoid: &cdvoid, 2841 OPhalt: &cdhalt, 2842 OPnullptr: &cderr, 2843 OPpair: &cdpair, 2844 OPrpair: &cdpair, 2845 2846 OPor: &cdorth, 2847 OPoror: &cdloglog, 2848 OPandand: &cdloglog, 2849 OProl: &cdshift, 2850 OPror: &cdshift, 2851 OPshl: &cdshift, 2852 OPshr: &cdshift, 2853 OPashr: &cdshift, 2854 OPbit: &cderr, 2855 OPind: &cdind, 2856 OPaddr: &cderr, 2857 OPneg: &cdneg, 2858 OPuadd: &cderr, 2859 OPabs: &cdabs, 2860 OPtoprec: &cdtoprec, 2861 OPsqrt: &cdneg, 2862 OPsin: &cdneg, 2863 OPcos: &cdneg, 2864 OPscale: &cdscale, 2865 OPyl2x: &cdscale, 2866 OPyl2xp1: &cdscale, 2867 OPcmpxchg: &cdcmpxchg, 2868 OPrint: &cdneg, 2869 OPrndtol: &cdrndtol, 2870 OPstrlen: &cdstrlen, 2871 OPstrcpy: &cdstrcpy, 2872 OPmemcpy: &cdmemcpy, 2873 OPmemset: &cdmemset, 2874 OPstrcat: &cderr, 2875 OPstrcmp: &cdstrcmp, 2876 OPmemcmp: &cdmemcmp, 2877 OPsetjmp: &cdsetjmp, 2878 OPnegass: &cdaddass, 2879 OPpreinc: &cderr, 2880 OPpredec: &cderr, 2881 OPstreq: &cdstreq, 2882 OPpostinc: &cdpost, 2883 OPpostdec: &cdpost, 2884 OPeq: &cdeq, 2885 OPaddass: &cdaddass, 2886 OPminass: &cdaddass, 2887 OPmulass: &cdmulass, 2888 OPdivass: &cddivass, 2889 OPmodass: &cddivass, 2890 OPshrass: &cdshass, 2891 OPashrass: &cdshass, 2892 OPshlass: &cdshass, 2893 OPandass: &cdaddass, 2894 OPxorass: &cdaddass, 2895 OPorass: &cdaddass, 2896 2897 OPle: &cdcmp, 2898 OPgt: &cdcmp, 2899 OPlt: &cdcmp, 2900 OPge: &cdcmp, 2901 OPeqeq: &cdcmp, 2902 OPne: &cdcmp, 2903 2904 OPunord: &cdcmp, 2905 OPlg: &cdcmp, 2906 OPleg: &cdcmp, 2907 OPule: &cdcmp, 2908 OPul: &cdcmp, 2909 OPuge: &cdcmp, 2910 OPug: &cdcmp, 2911 OPue: &cdcmp, 2912 OPngt: &cdcmp, 2913 OPnge: &cdcmp, 2914 OPnlt: &cdcmp, 2915 OPnle: &cdcmp, 2916 OPord: &cdcmp, 2917 OPnlg: &cdcmp, 2918 OPnleg: &cdcmp, 2919 OPnule: &cdcmp, 2920 OPnul: &cdcmp, 2921 OPnuge: &cdcmp, 2922 OPnug: &cdcmp, 2923 OPnue: &cdcmp, 2924 2925 OPvp_fp: &cdcnvt, 2926 OPcvp_fp: &cdcnvt, 2927 OPoffset: &cdlngsht, 2928 OPnp_fp: &cdshtlng, 2929 OPnp_f16p: &cdfar16, 2930 OPf16p_np: &cdfar16, 2931 2932 OPs16_32: &cdshtlng, 2933 OPu16_32: &cdshtlng, 2934 OPd_s32: &cdcnvt, 2935 OPb_8: &cdcnvt, 2936 OPs32_d: &cdcnvt, 2937 OPd_s16: &cdcnvt, 2938 OPs16_d: &cdcnvt, 2939 OPd_u16: &cdcnvt, 2940 OPu16_d: &cdcnvt, 2941 OPd_u32: &cdcnvt, 2942 OPu32_d: &cdcnvt, 2943 OP32_16: &cdlngsht, 2944 OPd_f: &cdcnvt, 2945 OPf_d: &cdcnvt, 2946 OPd_ld: &cdcnvt, 2947 OPld_d: &cdcnvt, 2948 OPc_r: &cdconvt87, 2949 OPc_i: &cdconvt87, 2950 OPu8_16: &cdbyteint, 2951 OPs8_16: &cdbyteint, 2952 OP16_8: &cdlngsht, 2953 OPu32_64: &cdshtlng, 2954 OPs32_64: &cdshtlng, 2955 OP64_32: &cdlngsht, 2956 OPu64_128: &cdshtlng, 2957 OPs64_128: &cdshtlng, 2958 OP128_64: &cdlngsht, 2959 OPmsw: &cdmsw, 2960 2961 OPd_s64: &cdcnvt, 2962 OPs64_d: &cdcnvt, 2963 OPd_u64: &cdcnvt, 2964 OPu64_d: &cdcnvt, 2965 OPld_u64: &cdcnvt, 2966 OPparam: &cderr, 2967 OPsizeof: &cderr, 2968 OParrow: &cderr, 2969 OParrowstar: &cderr, 2970 OPcolon: &cderr, 2971 OPcolon2: &cderr, 2972 OPbool: &cdnot, 2973 OPcall: &cdfunc, 2974 OPucall: &cdfunc, 2975 OPcallns: &cdfunc, 2976 OPucallns: &cdfunc, 2977 OPstrpar: &cderr, 2978 OPstrctor: &cderr, 2979 OPstrthis: &cdstrthis, 2980 OPconst: &cderr, 2981 OPvar: &cderr, 2982 OPnew: &cderr, 2983 OPanew: &cderr, 2984 OPdelete: &cderr, 2985 OPadelete: &cderr, 2986 OPbrack: &cderr, 2987 OPframeptr: &cdframeptr, 2988 OPgot: &cdgot, 2989 2990 OPbsf: &cdbscan, 2991 OPbsr: &cdbscan, 2992 OPbtst: &cdbtst, 2993 OPbt: &cdbt, 2994 OPbtc: &cdbt, 2995 OPbtr: &cdbt, 2996 OPbts: &cdbt, 2997 2998 OPbswap: &cdbswap, 2999 OPpopcnt: &cdpopcnt, 3000 OPvector: &cdvector, 3001 OPvecsto: &cdvecsto, 3002 OPvecfill: &cdvecfill, 3003 OPva_start: &cderr, 3004 OPprefetch: &cdprefetch, 3005 ]; 3006 3007 3008 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag) 3009 { 3010 Symbol *s; 3011 3012 debug if (debugw) 3013 { 3014 printf("+codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs)); 3015 WROP(e.Eoper); 3016 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3017 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3018 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 3019 } 3020 3021 assert(e); 3022 elem_debug(e); 3023 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 3024 { 3025 debug 3026 { 3027 printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs)); 3028 elem_print(e); 3029 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3030 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3031 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 3032 } 3033 assert(0); 3034 } 3035 3036 if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar) 3037 *pretregs &= ~regcon.mvar; /* can't use register vars */ 3038 3039 uint op = e.Eoper; 3040 if (e.Ecount && e.Ecount != e.Ecomsub) // if common subexp 3041 { 3042 comsub(cdb,e,pretregs); 3043 goto L1; 3044 } 3045 3046 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 3047 cdb.genlinnum(e.Esrcpos); 3048 3049 switch (op) 3050 { 3051 default: 3052 if (e.Ecount) /* if common subexp */ 3053 { 3054 /* if no return value */ 3055 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0) 3056 { 3057 if (*pretregs & (mST0 | mST01)) 3058 { 3059 //printf("generate ST0 comsub for:\n"); 3060 //elem_print(e); 3061 3062 regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1; 3063 (*cdxxx[op])(cdb,e,&retregs); 3064 cssave(e,retregs,!OTleaf(op)); 3065 fixresult(cdb, e, retregs, pretregs); 3066 goto L1; 3067 } 3068 if (tysize(e.Ety) == 1) 3069 *pretregs |= BYTEREGS; 3070 else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs) 3071 *pretregs |= XMMREGS; 3072 else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias) 3073 *pretregs |= DOUBLEREGS; 3074 else 3075 *pretregs |= ALLREGS; /* make one */ 3076 } 3077 3078 /* BUG: For CSEs, make sure we have both an MSW */ 3079 /* and an LSW specified in *pretregs */ 3080 } 3081 assert(op <= OPMAX); 3082 (*cdxxx[op])(cdb,e,pretregs); 3083 break; 3084 3085 case OPrelconst: 3086 cdrelconst(cdb,e,pretregs); 3087 break; 3088 3089 case OPvar: 3090 if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg && 3091 (s.Sregm & *pretregs) == s.Sregm) 3092 { 3093 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE) 3094 *pretregs &= mPSW | (s.Sregm & mLSW); 3095 else 3096 *pretregs &= mPSW | s.Sregm; 3097 } 3098 goto case OPconst; 3099 3100 case OPconst: 3101 if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile)) 3102 { 3103 switch (tybasic(e.Ety)) 3104 { 3105 case TYbool: 3106 case TYchar: 3107 case TYschar: 3108 case TYuchar: 3109 *pretregs |= BYTEREGS; 3110 break; 3111 3112 case TYnref: 3113 case TYnptr: 3114 case TYsptr: 3115 case TYcptr: 3116 case TYfgPtr: 3117 case TYimmutPtr: 3118 case TYsharePtr: 3119 case TYrestrictPtr: 3120 *pretregs |= I16 ? IDXREGS : ALLREGS; 3121 break; 3122 3123 case TYshort: 3124 case TYushort: 3125 case TYint: 3126 case TYuint: 3127 case TYlong: 3128 case TYulong: 3129 case TYllong: 3130 case TYullong: 3131 case TYcent: 3132 case TYucent: 3133 case TYfptr: 3134 case TYhptr: 3135 case TYvptr: 3136 *pretregs |= ALLREGS; 3137 break; 3138 3139 default: 3140 break; 3141 } 3142 } 3143 loaddata(cdb,e,pretregs); 3144 break; 3145 } 3146 cssave(e,*pretregs,!OTleaf(op)); 3147 L1: 3148 if (!(constflag & 2)) 3149 freenode(e); 3150 3151 debug if (debugw) 3152 { 3153 printf("-codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs)); 3154 WROP(op); 3155 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3156 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3157 } 3158 } 3159 3160 /******************************* 3161 * Same as codelem(), but do not destroy the registers in keepmsk. 3162 * Use scratch registers as much as possible, then use stack. 3163 * Input: 3164 * constflag true if user of result will not modify the 3165 * registers returned in *pretregs. 3166 */ 3167 3168 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag) 3169 { 3170 regm_t touse; 3171 3172 debug if (debugw) 3173 printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3174 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3175 3176 elem_debug(e); 3177 if (constflag) 3178 { 3179 regm_t regm; 3180 reg_t reg; 3181 3182 if (isregvar(e,®m,®) && // if e is a register variable 3183 (regm & *pretregs) == regm && // in one of the right regs 3184 e.EV.Voffset == 0 3185 ) 3186 { 3187 uint sz1 = tysize(e.Ety); 3188 uint sz2 = tysize(e.EV.Vsym.Stype.Tty); 3189 if (sz1 <= REGSIZE && sz2 > REGSIZE) 3190 regm &= mLSW | XMMREGS; 3191 fixresult(cdb,e,regm,pretregs); 3192 cssave(e,regm,0); 3193 freenode(e); 3194 3195 debug if (debugw) 3196 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3197 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3198 3199 return; 3200 } 3201 } 3202 regm_t overlap = msavereg & keepmsk; 3203 msavereg |= keepmsk; /* add to mask of regs to save */ 3204 regm_t oldregcon = regcon.cse.mval; 3205 regm_t oldregimmed = regcon.immed.mval; 3206 regm_t oldmfuncreg = mfuncreg; /* remember old one */ 3207 mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar; 3208 uint stackpushsave = stackpush; 3209 char calledafuncsave = calledafunc; 3210 calledafunc = 0; 3211 CodeBuilder cdbx; cdbx.ctor(); 3212 codelem(cdbx,e,pretregs,constflag); // generate code for the elem 3213 3214 regm_t tosave = keepmsk & ~msavereg; /* registers to save */ 3215 if (tosave) 3216 { 3217 cgstate.stackclean++; 3218 genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg); 3219 cgstate.stackclean--; 3220 } 3221 3222 /* Assert that no new CSEs are generated that are not reflected */ 3223 /* in mfuncreg. */ 3224 debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0) 3225 printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n", 3226 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar)); 3227 3228 assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0); 3229 3230 /* bugzilla 3521 3231 * The problem is: 3232 * reg op (reg = exp) 3233 * where reg must be preserved (in keepregs) while the expression to be evaluated 3234 * must change it. 3235 * The only solution is to make this variable not a register. 3236 */ 3237 if (regcon.mvar & tosave) 3238 { 3239 //elem_print(e); 3240 //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave)); 3241 cgreg_unregister(regcon.mvar & tosave); 3242 } 3243 3244 /* which registers can we use to save other registers in? */ 3245 if (config.flags4 & CFG4space || // if optimize for space 3246 config.target_cpu >= TARGET_80486) // PUSH/POP ops are 1 cycle 3247 touse = 0; // PUSH/POP pairs are always shorter 3248 else 3249 { 3250 touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval); 3251 /* Don't use registers we'll have to save/restore */ 3252 touse &= ~(fregsaved & oldmfuncreg); 3253 /* Don't use registers that have constant values in them, since 3254 the code generated might have used the value. 3255 */ 3256 touse &= ~oldregimmed; 3257 } 3258 3259 CodeBuilder cdbs1; cdbs1.ctor(); 3260 code *cs2 = null; 3261 int adjesp = 0; 3262 3263 for (uint i = 0; tosave; i++) 3264 { 3265 regm_t mi = mask(i); 3266 3267 assert(i < REGMAX); 3268 if (mi & tosave) /* i = register to save */ 3269 { 3270 if (touse) /* if any scratch registers */ 3271 { 3272 uint j; 3273 for (j = 0; j < 8; j++) 3274 { 3275 regm_t mj = mask(j); 3276 3277 if (touse & mj) 3278 { 3279 genmovreg(cdbs1,j,i); 3280 cs2 = cat(genmovreg(i,j),cs2); 3281 touse &= ~mj; 3282 mfuncreg &= ~mj; 3283 regcon.used |= mj; 3284 break; 3285 } 3286 } 3287 assert(j < 8); 3288 } 3289 else // else use memory 3290 { 3291 CodeBuilder cdby; cdby.ctor(); 3292 uint size = gensaverestore(mask(i), cdbs1, cdby); 3293 cs2 = cat(cdby.finish(),cs2); 3294 if (size) 3295 { 3296 stackchanged = 1; 3297 adjesp += size; 3298 } 3299 } 3300 getregs(cdbx,mi); 3301 tosave &= ~mi; 3302 } 3303 } 3304 CodeBuilder cdbs2; cdbs2.ctor(); 3305 if (adjesp) 3306 { 3307 // If this is done an odd number of times, it 3308 // will throw off the 8 byte stack alignment. 3309 // We should *only* worry about this if a function 3310 // was called in the code generation by codelem(). 3311 int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1); 3312 if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 3313 { 3314 regm_t mval_save = regcon.immed.mval; 3315 regcon.immed.mval = 0; // prevent reghasvalue() optimizations 3316 // because c hasn't been executed yet 3317 cod3_stackadj(cdbs1, sz); 3318 regcon.immed.mval = mval_save; 3319 cdbs1.genadjesp(sz); 3320 3321 cod3_stackadj(cdbs2, -sz); 3322 cdbs2.genadjesp(-sz); 3323 } 3324 cdbs2.append(cs2); 3325 3326 3327 cdbs1.genadjesp(adjesp); 3328 cdbs2.genadjesp(-adjesp); 3329 } 3330 else 3331 cdbs2.append(cs2); 3332 3333 calledafunc |= calledafuncsave; 3334 msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save */ 3335 mfuncreg &= oldmfuncreg; /* update original */ 3336 3337 debug if (debugw) 3338 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3339 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3340 3341 cdb.append(cdbs1); 3342 cdb.append(cdbx); 3343 cdb.append(cdbs2); 3344 return; 3345 } 3346 3347 /********************************************* 3348 * Turn register mask into a string suitable for printing. 3349 */ 3350 3351 const(char)* regm_str(regm_t rm) 3352 { 3353 enum NUM = 10; 3354 enum SMAX = 128; 3355 __gshared char[SMAX + 1][NUM] str; 3356 __gshared int i; 3357 3358 if (rm == 0) 3359 return "0"; 3360 if (rm == ALLREGS) 3361 return "ALLREGS"; 3362 if (rm == BYTEREGS) 3363 return "BYTEREGS"; 3364 if (rm == allregs) 3365 return "allregs"; 3366 if (rm == XMMREGS) 3367 return "XMMREGS"; 3368 char *p = str[i].ptr; 3369 if (++i == NUM) 3370 i = 0; 3371 *p = 0; 3372 for (size_t j = 0; j < 32; j++) 3373 { 3374 if (mask(cast(uint)j) & rm) 3375 { 3376 strcat(p,regstring[j]); 3377 rm &= ~mask(cast(uint)j); 3378 if (rm) 3379 strcat(p,"|"); 3380 } 3381 } 3382 if (rm) 3383 { char *s = p + strlen(p); 3384 sprintf(s,"x%02x",rm); 3385 } 3386 assert(strlen(p) <= SMAX); 3387 return strdup(p); 3388 } 3389 3390 /********************************* 3391 * Scan down comma-expressions. 3392 * Output: 3393 * *pe = first elem down right side that is not an OPcomma 3394 * Returns: 3395 * code generated for left branches of comma-expressions 3396 */ 3397 3398 void docommas(ref CodeBuilder cdb,elem **pe) 3399 { 3400 uint stackpushsave = stackpush; 3401 int stackcleansave = cgstate.stackclean; 3402 cgstate.stackclean = 0; 3403 elem* e = *pe; 3404 while (1) 3405 { 3406 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 3407 { 3408 cdb.genlinnum(e.Esrcpos); 3409 //e.Esrcpos.Slinnum = 0; // don't do it twice 3410 } 3411 if (e.Eoper != OPcomma) 3412 break; 3413 regm_t retregs = 0; 3414 codelem(cdb,e.EV.E1,&retregs,true); 3415 elem* eold = e; 3416 e = e.EV.E2; 3417 freenode(eold); 3418 } 3419 *pe = e; 3420 assert(cgstate.stackclean == 0); 3421 cgstate.stackclean = stackcleansave; 3422 genstackclean(cdb,stackpush - stackpushsave,0); 3423 } 3424 3425 /************************** 3426 * For elems in regcon that don't match regconsave, 3427 * clear the corresponding bit in regcon.cse.mval. 3428 * Do same for regcon.immed. 3429 */ 3430 3431 void andregcon(con_t *pregconsave) 3432 { 3433 regm_t m = ~1; 3434 for (int i = 0; i < REGMAX; i++) 3435 { 3436 if (pregconsave.cse.value[i] != regcon.cse.value[i]) 3437 regcon.cse.mval &= m; 3438 if (pregconsave.immed.value[i] != regcon.immed.value[i]) 3439 regcon.immed.mval &= m; 3440 m <<= 1; 3441 m |= 1; 3442 } 3443 //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval)); 3444 regcon.used |= pregconsave.used; 3445 regcon.cse.mval &= pregconsave.cse.mval; 3446 regcon.immed.mval &= pregconsave.immed.mval; 3447 regcon.params &= pregconsave.params; 3448 //printf("regcon.cse.mval®con.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops)); 3449 regcon.cse.mops &= regcon.cse.mval; 3450 } 3451 3452 }