1 /** 2 * Top level code for the code generator. 3 * 4 * Copyright: Copyright (C) 1985-1998 by Symantec 5 * Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved 6 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 7 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcod.d, backend/cgcod.d) 9 * Documentation: https://dlang.org/phobos/dmd_backend_cgcod.html 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcod.d 11 */ 12 13 module dmd.backend.cgcod; 14 15 version = FRAMEPTR; 16 17 version (SCPP) 18 version = COMPILE; 19 version (MARS) 20 version = COMPILE; 21 22 version (COMPILE) 23 { 24 25 import core.stdc.stdio; 26 import core.stdc.stdlib; 27 import core.stdc.string; 28 29 import dmd.backend.backend; 30 import dmd.backend.cc; 31 import dmd.backend.cdef; 32 import dmd.backend.code; 33 import dmd.backend.cgcse; 34 import dmd.backend.code_x86; 35 import dmd.backend.codebuilder; 36 import dmd.backend.dlist; 37 import dmd.backend.dvec; 38 import dmd.backend.melf; 39 import dmd.backend.mem; 40 import dmd.backend.el; 41 import dmd.backend.exh; 42 import dmd.backend.global; 43 import dmd.backend.obj; 44 import dmd.backend.oper; 45 import dmd.backend.outbuf; 46 import dmd.backend.rtlsym; 47 import dmd.backend.symtab; 48 import dmd.backend.ty; 49 import dmd.backend.type; 50 import dmd.backend.xmm; 51 52 import dmd.backend.barray; 53 54 version (SCPP) 55 { 56 import parser; 57 import precomp; 58 } 59 60 extern (C++): 61 62 nothrow: 63 64 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 65 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 66 67 version (MARS) 68 enum MARS = true; 69 else 70 enum MARS = false; 71 72 void dwarf_except_gentables(Funcsym *sfunc, uint startoffset, uint retoffset); 73 int REGSIZE(); 74 75 private extern (D) uint mask(uint m) { return 1 << m; } 76 77 78 __gshared 79 { 80 bool floatreg; // !=0 if floating register is required 81 82 int hasframe; // !=0 if this function has a stack frame 83 bool enforcealign; // enforced stack alignment 84 targ_size_t spoff; 85 targ_size_t Foff; // BP offset of floating register 86 targ_size_t CSoff; // offset of common sub expressions 87 targ_size_t NDPoff; // offset of saved 8087 registers 88 targ_size_t pushoff; // offset of saved registers 89 bool pushoffuse; // using pushoff 90 int BPoff; // offset from BP 91 int EBPtoESP; // add to EBP offset to get ESP offset 92 LocalSection Para; // section of function parameters 93 LocalSection Auto; // section of automatics and registers 94 LocalSection Fast; // section of fastpar 95 LocalSection EEStack; // offset of SCstack variables from ESP 96 LocalSection Alloca; // data for alloca() temporary 97 98 REGSAVE regsave; 99 100 CGstate cgstate; // state of code generator 101 102 regm_t BYTEREGS = BYTEREGS_INIT; 103 regm_t ALLREGS = ALLREGS_INIT; 104 105 106 /************************************ 107 * # of bytes that SP is beyond BP. 108 */ 109 110 uint stackpush; 111 112 int stackchanged; /* set to !=0 if any use of the stack 113 other than accessing parameters. Used 114 to see if we can address parameters 115 with ESP rather than EBP. 116 */ 117 int refparam; // !=0 if we referenced any parameters 118 int reflocal; // !=0 if we referenced any locals 119 bool anyiasm; // !=0 if any inline assembler 120 char calledafunc; // !=0 if we called a function 121 char needframe; // if true, then we will need the frame 122 // pointer (BP for the 8088) 123 char gotref; // !=0 if the GOTsym was referenced 124 uint usednteh; // if !=0, then used NT exception handling 125 bool calledFinally; // true if called a BC_finally block 126 127 /* Register contents */ 128 con_t regcon; 129 130 int pass; // PASSxxxx 131 132 private Symbol *retsym; // set to symbol that should be placed in 133 // register AX 134 135 /**************************** 136 * Register masks. 137 */ 138 139 regm_t msavereg; // Mask of registers that we would like to save. 140 // they are temporaries (set by scodelem()) 141 regm_t mfuncreg; // Mask of registers preserved by a function 142 143 regm_t allregs; // ALLREGS optionally including mBP 144 145 int dfoidx; /* which block we are in */ 146 147 targ_size_t funcoffset; // offset of start of function 148 targ_size_t prolog_allocoffset; // offset past adj of stack allocation 149 targ_size_t startoffset; // size of function entry code 150 targ_size_t retoffset; /* offset from start of func to ret code */ 151 targ_size_t retsize; /* size of function return */ 152 153 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs; 154 155 } 156 157 /********************************* 158 * Generate code for a function. 159 * Note at the end of this routine mfuncreg will contain the mask 160 * of registers not affected by the function. Some minor optimization 161 * possibilities are here. 162 * Params: 163 * sfunc = function to generate code for 164 */ 165 166 void codgen(Symbol *sfunc) 167 { 168 bool flag; 169 block *btry; 170 171 // Register usage. If a bit is on, the corresponding register is live 172 // in that basic block. 173 174 //printf("codgen('%s')\n",funcsym_p.Sident.ptr); 175 assert(sfunc == funcsym_p); 176 assert(cseg == funcsym_p.Sseg); 177 178 cgreg_init(); 179 CSE.initialize(); 180 tym_t functy = tybasic(sfunc.ty()); 181 cod3_initregs(); 182 allregs = ALLREGS; 183 pass = PASSinitial; 184 Alloca.init(); 185 anyiasm = 0; 186 187 if (config.ehmethod == EHmethod.EH_DWARF) 188 { 189 /* The dwarf unwinder relies on the function epilog to exist 190 */ 191 for (block* b = startblock; b; b = b.Bnext) 192 { 193 if (b.BC == BCexit) 194 b.BC = BCret; 195 } 196 } 197 198 tryagain: 199 debug 200 if (debugr) 201 printf("------------------ PASS%s -----------------\n", 202 (pass == PASSinitial) ? "init".ptr : ((pass == PASSreg) ? "reg".ptr : "final".ptr)); 203 204 lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0; 205 206 // if no parameters, assume we don't need a stack frame 207 needframe = 0; 208 enforcealign = false; 209 gotref = 0; 210 stackchanged = 0; 211 stackpush = 0; 212 refparam = 0; 213 calledafunc = 0; 214 retsym = null; 215 216 cgstate.stackclean = 1; 217 cgstate.funcarg.init(); 218 cgstate.funcargtos = ~0; 219 cgstate.accessedTLS = false; 220 STACKALIGN = TARGET_STACKALIGN; 221 222 regsave.reset(); 223 memset(global87.stack.ptr,0,global87.stack.sizeof); 224 225 calledFinally = false; 226 usednteh = 0; 227 228 static if (MARS) 229 { 230 if (sfunc.Sfunc.Fflags3 & Fjmonitor && 231 config.exe & EX_windos) 232 usednteh |= NTEHjmonitor; 233 } 234 else version (SCPP) 235 { 236 if (CPP) 237 { 238 if (config.exe == EX_WIN32 && 239 (sfunc.Stype.Tflags & TFemptyexc || sfunc.Stype.Texcspec)) 240 usednteh |= NTEHexcspec; 241 except_reset(); 242 } 243 } 244 245 // Set on a trial basis, turning it off if anything might throw 246 sfunc.Sfunc.Fflags3 |= Fnothrow; 247 248 floatreg = false; 249 assert(global87.stackused == 0); /* nobody in 8087 stack */ 250 251 CSE.start(); 252 memset(®con,0,regcon.sizeof); 253 regcon.cse.mval = regcon.cse.mops = 0; // no common subs yet 254 msavereg = 0; 255 uint nretblocks = 0; 256 mfuncreg = fregsaved; // so we can see which are used 257 // (bit is cleared each time 258 // we use one) 259 for (block* b = startblock; b; b = b.Bnext) 260 { 261 memset(&b.Bregcon,0,b.Bregcon.sizeof); // Clear out values in registers 262 if (b.Belem) 263 resetEcomsub(b.Belem); // reset all the Ecomsubs 264 if (b.BC == BCasm) 265 anyiasm = 1; // we have inline assembler 266 if (b.BC == BCret || b.BC == BCretexp) 267 nretblocks++; 268 } 269 270 if (!config.fulltypes || (config.flags4 & CFG4optimized)) 271 { 272 regm_t noparams = 0; 273 for (int i = 0; i < globsym.length; i++) 274 { 275 Symbol *s = globsym[i]; 276 s.Sflags &= ~SFLread; 277 switch (s.Sclass) 278 { 279 case SCfastpar: 280 case SCshadowreg: 281 regcon.params |= s.Spregm(); 282 goto case SCparameter; 283 284 case SCparameter: 285 if (s.Sfl == FLreg) 286 noparams |= s.Sregm; 287 break; 288 289 default: 290 break; 291 } 292 } 293 regcon.params &= ~noparams; 294 } 295 296 if (config.flags4 & CFG4optimized) 297 { 298 if (nretblocks == 0 && // if no return blocks in function 299 !(sfunc.ty() & mTYnaked)) // naked functions may have hidden veys of returning 300 sfunc.Sflags |= SFLexit; // mark function as never returning 301 302 assert(dfo); 303 304 cgreg_reset(); 305 for (dfoidx = 0; dfoidx < dfo.length; dfoidx++) 306 { 307 regcon.used = msavereg | regcon.cse.mval; // registers already in use 308 block* b = dfo[dfoidx]; 309 blcodgen(b); // gen code in depth-first order 310 //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used)); 311 cgreg_used(dfoidx, b.Bregcon.used); // gather register used information 312 } 313 } 314 else 315 { 316 pass = PASSfinal; 317 for (block* b = startblock; b; b = b.Bnext) 318 blcodgen(b); // generate the code for each block 319 } 320 regcon.immed.mval = 0; 321 assert(!regcon.cse.mops); // should have all been used 322 323 // See which variables we can put into registers 324 if (pass != PASSfinal && 325 !anyiasm) // possible LEA or LES opcodes 326 { 327 allregs |= cod3_useBP(); // see if we can use EBP 328 329 // If pic code, but EBX was never needed 330 if (!(allregs & mask(PICREG)) && !gotref) 331 { 332 allregs |= mask(PICREG); // EBX can now be used 333 cgreg_assign(retsym); 334 pass = PASSreg; 335 } 336 else if (cgreg_assign(retsym)) // if we found some registers 337 pass = PASSreg; 338 else 339 pass = PASSfinal; 340 for (block* b = startblock; b; b = b.Bnext) 341 { 342 code_free(b.Bcode); 343 b.Bcode = null; 344 } 345 goto tryagain; 346 } 347 cgreg_term(); 348 349 version (SCPP) 350 { 351 if (CPP) 352 cgcod_eh(); 353 } 354 355 // See if we need to enforce a particular stack alignment 356 foreach (i; 0 .. globsym.length) 357 { 358 Symbol *s = globsym[i]; 359 360 if (Symbol_Sisdead(s, anyiasm)) 361 continue; 362 363 switch (s.Sclass) 364 { 365 case SCregister: 366 case SCauto: 367 case SCfastpar: 368 if (s.Sfl == FLreg) 369 break; 370 371 const sz = type_alignsize(s.Stype); 372 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX)) 373 { 374 STACKALIGN = sz; 375 enforcealign = true; 376 } 377 break; 378 379 default: 380 break; 381 } 382 } 383 384 stackoffsets(globsym, false); // compute final offsets of stack variables 385 cod5_prol_epi(); // see where to place prolog/epilog 386 CSE.finish(); // compute addresses and sizes of CSE saves 387 388 if (configv.addlinenumbers) 389 objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg)); 390 391 // Otherwise, jmp's to startblock will execute the prolog again 392 assert(!startblock.Bpred); 393 394 CodeBuilder cdbprolog; cdbprolog.ctor(); 395 prolog(cdbprolog); // gen function start code 396 code *cprolog = cdbprolog.finish(); 397 if (cprolog) 398 pinholeopt(cprolog,null); // optimize 399 400 funcoffset = Offset(sfunc.Sseg); 401 targ_size_t coffset = Offset(sfunc.Sseg); 402 403 if (eecontext.EEelem) 404 genEEcode(); 405 406 for (block* b = startblock; b; b = b.Bnext) 407 { 408 // We couldn't do this before because localsize was unknown 409 switch (b.BC) 410 { 411 case BCret: 412 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked)) 413 { 414 CodeBuilder cdb; cdb.ctor(); 415 cdb.append(b.Bcode); 416 cdb.genlinnum(b.Bsrcpos); 417 b.Bcode = cdb.finish(); 418 } 419 goto case BCretexp; 420 421 case BCretexp: 422 epilog(b); 423 break; 424 425 default: 426 if (b.Bflags & BFLepilog) 427 epilog(b); 428 break; 429 } 430 assignaddr(b); // assign addresses 431 pinholeopt(b.Bcode,b); // do pinhole optimization 432 if (b.Bflags & BFLprolog) // do function prolog 433 { 434 startoffset = coffset + calcblksize(cprolog) - funcoffset; 435 b.Bcode = cat(cprolog,b.Bcode); 436 } 437 cgsched_block(b); 438 b.Bsize = calcblksize(b.Bcode); // calculate block size 439 if (b.Balign) 440 { 441 targ_size_t u = b.Balign - 1; 442 coffset = (coffset + u) & ~u; 443 } 444 b.Boffset = coffset; /* offset of this block */ 445 coffset += b.Bsize; /* offset of following block */ 446 } 447 448 debug 449 debugw && printf("code addr complete\n"); 450 451 // Do jump optimization 452 do 453 { 454 flag = false; 455 for (block* b = startblock; b; b = b.Bnext) 456 { 457 if (b.Bflags & BFLjmpoptdone) /* if no more jmp opts for this blk */ 458 continue; 459 int i = branch(b,0); // see if jmp => jmp short 460 if (i) // if any bytes saved 461 { targ_size_t offset; 462 463 b.Bsize -= i; 464 offset = b.Boffset + b.Bsize; 465 for (block* bn = b.Bnext; bn; bn = bn.Bnext) 466 { 467 if (bn.Balign) 468 { targ_size_t u = bn.Balign - 1; 469 470 offset = (offset + u) & ~u; 471 } 472 bn.Boffset = offset; 473 offset += bn.Bsize; 474 } 475 coffset = offset; 476 flag = true; 477 } 478 } 479 if (!I16 && !(config.flags4 & CFG4optimized)) 480 break; // use the long conditional jmps 481 } while (flag); // loop till no more bytes saved 482 483 debug 484 debugw && printf("code jump optimization complete\n"); 485 486 version (MARS) 487 { 488 if (usednteh & NTEH_try) 489 { 490 // Do this before code is emitted because we patch some instructions 491 nteh_filltables(); 492 } 493 } 494 495 // Compute starting offset for switch tables 496 targ_size_t swoffset; 497 int jmpseg = -1; 498 if (config.flags & CFGromable) 499 { 500 jmpseg = 0; 501 swoffset = coffset; 502 } 503 504 // Emit the generated code 505 if (eecontext.EEcompile == 1) 506 { 507 codout(sfunc.Sseg,eecontext.EEcode); 508 code_free(eecontext.EEcode); 509 version (SCPP) 510 { 511 el_free(eecontext.EEelem); 512 } 513 } 514 else 515 { 516 for (block* b = startblock; b; b = b.Bnext) 517 { 518 if (b.BC == BCjmptab || b.BC == BCswitch) 519 { 520 if (jmpseg == -1) 521 { 522 jmpseg = objmod.jmpTableSegment(sfunc); 523 swoffset = Offset(jmpseg); 524 } 525 swoffset = _align(0,swoffset); 526 b.Btableoffset = swoffset; /* offset of sw tab */ 527 swoffset += b.Btablesize; 528 } 529 jmpaddr(b.Bcode); /* assign jump addresses */ 530 531 debug 532 if (debugc) 533 { 534 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n", 535 cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg)); 536 if (b.Bcode) 537 printf( "First opcode of block is: %0x\n", b.Bcode.Iop ); 538 } 539 540 if (b.Balign) 541 { uint u = b.Balign; 542 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1); 543 544 cod3_align_bytes(sfunc.Sseg, nalign); 545 } 546 assert(b.Boffset == Offset(sfunc.Sseg)); 547 548 version (SCPP) 549 { 550 if (CPP && !(config.exe == EX_WIN32)) 551 { 552 //printf("b = %p, index = %d\n",b,b.Bindex); 553 //except_index_set(b.Bindex); 554 555 if (btry != b.Btry) 556 { 557 btry = b.Btry; 558 except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset); 559 } 560 if (b.BC == BCtry) 561 { 562 btry = b; 563 except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset); 564 } 565 } 566 } 567 568 codout(sfunc.Sseg,b.Bcode); // output code 569 } 570 if (coffset != Offset(sfunc.Sseg)) 571 { 572 debug 573 printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg)); 574 575 assert(0); 576 } 577 sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset; // size of function 578 579 static if (NTEXCEPTIONS || MARS) 580 { 581 version (MARS) 582 const nteh = usednteh & NTEH_try; 583 else static if (NTEXCEPTIONS) 584 const nteh = usednteh & NTEHcpp; 585 else 586 enum nteh = true; 587 if (nteh) 588 { 589 assert(!(config.flags & CFGromable)); 590 //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset); 591 objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset); 592 } 593 } 594 595 // Write out switch tables 596 flag = false; // true if last active block was a ret 597 for (block* b = startblock; b; b = b.Bnext) 598 { 599 switch (b.BC) 600 { 601 case BCjmptab: /* if jump table */ 602 outjmptab(b); /* write out jump table */ 603 goto Ldefault; 604 605 case BCswitch: 606 outswitab(b); /* write out switch table */ 607 goto Ldefault; 608 609 case BCret: 610 case BCretexp: 611 /* Compute offset to return code from start of function */ 612 retoffset = b.Boffset + b.Bsize - retsize - funcoffset; 613 version (MARS) 614 { 615 /* Add 3 bytes to retoffset in case we have an exception 616 * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT 617 * IT FIXES THE PROBLEM HERE AS WELL. 618 */ 619 if (usednteh & NTEH_try) 620 retoffset += 3; 621 } 622 flag = true; 623 break; 624 625 default: 626 Ldefault: 627 retoffset = b.Boffset + b.Bsize - funcoffset; 628 break; 629 } 630 } 631 if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked)) 632 /* put line number at end of function on the 633 start of the last instruction 634 */ 635 /* Instead, try offset to cleanup code */ 636 if (retoffset < sfunc.Ssize) 637 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset); 638 639 static if (MARS) 640 { 641 if (config.exe == EX_WIN64) 642 win64_pdata(sfunc); 643 } 644 645 static if (MARS) 646 { 647 if (usednteh & NTEH_try) 648 { 649 // Do this before code is emitted because we patch some instructions 650 nteh_gentables(sfunc); 651 } 652 if (usednteh & (EHtry | EHcleanup) && // saw BCtry or BC_try or OPddtor 653 config.ehmethod == EHmethod.EH_DM) 654 { 655 except_gentables(); 656 } 657 if (config.ehmethod == EHmethod.EH_DWARF) 658 { 659 sfunc.Sfunc.Fstartblock = startblock; 660 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset); 661 sfunc.Sfunc.Fstartblock = null; 662 } 663 } 664 665 version (SCPP) 666 { 667 // Write out frame handler 668 if (NTEXCEPTIONS && usednteh & NTEHcpp) 669 { 670 nteh_framehandler(sfunc, except_gentables()); 671 } 672 else 673 { 674 if (NTEXCEPTIONS && usednteh & NTEH_try) 675 { 676 nteh_gentables(sfunc); 677 } 678 else 679 { 680 if (CPP) 681 except_gentables(); 682 } 683 } 684 } 685 686 for (block* b = startblock; b; b = b.Bnext) 687 { 688 code_free(b.Bcode); 689 b.Bcode = null; 690 } 691 } 692 693 // Mask of regs saved 694 // BUG: do interrupt functions save BP? 695 sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved); 696 697 debug 698 if (global87.stackused != 0) 699 printf("stackused = %d\n",global87.stackused); 700 701 assert(global87.stackused == 0); /* nobody in 8087 stack */ 702 703 global87.save.dtor(); // clean up ndp save array 704 } 705 706 /********************************************* 707 * Align sections on the stack. 708 * base negative offset of section from frame pointer 709 * alignment alignment to use 710 * bias difference between where frame pointer points and the STACKALIGNed 711 * part of the stack 712 * Returns: 713 * base revised downward so it is aligned 714 */ 715 targ_size_t alignsection(targ_size_t base, uint alignment, int bias) 716 { 717 assert(cast(int)base <= 0); 718 if (alignment > STACKALIGN) 719 alignment = STACKALIGN; 720 if (alignment) 721 { 722 int sz = cast(int)(-base + bias); 723 assert(sz >= 0); 724 sz &= (alignment - 1); 725 if (sz) 726 base -= alignment - sz; 727 } 728 return base; 729 } 730 731 /******************************* 732 * Generate code for a function start. 733 * Input: 734 * Offset(cseg) address of start of code 735 * Auto.alignment 736 * Output: 737 * Offset(cseg) adjusted for size of code generated 738 * EBPtoESP 739 * hasframe 740 * BPoff 741 */ 742 void prolog(ref CodeBuilder cdb) 743 { 744 bool enter; 745 746 //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident.ptr, needframe, Auto.alignment); 747 debug debugw && printf("funcstart()\n"); 748 regcon.immed.mval = 0; /* no values in registers yet */ 749 version (FRAMEPTR) 750 EBPtoESP = 0; 751 else 752 EBPtoESP = -REGSIZE; 753 hasframe = 0; 754 bool pushds = false; 755 BPoff = 0; 756 bool pushalloc = false; 757 tym_t tyf = funcsym_p.ty(); 758 tym_t tym = tybasic(tyf); 759 const farfunc = tyfarfunc(tym) != 0; 760 761 // Special Intel 64 bit ABI prolog setup for variadic functions 762 Symbol *sv64 = null; // set to __va_argsave 763 if (I64 && variadic(funcsym_p.Stype)) 764 { 765 /* The Intel 64 bit ABI scheme. 766 * abi_sysV_amd64.pdf 767 * Load arguments passed in registers into the varargs save area 768 * so they can be accessed by va_arg(). 769 */ 770 /* Look for __va_argsave 771 */ 772 for (SYMIDX si = 0; si < globsym.length; si++) 773 { 774 Symbol *s = globsym[si]; 775 if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0) 776 { 777 if (!(s.Sflags & SFLdead)) 778 sv64 = s; 779 break; 780 } 781 } 782 } 783 784 if (config.flags & CFGalwaysframe || 785 funcsym_p.Sfunc.Fflags3 & Ffakeeh || 786 /* The exception stack unwinding mechanism relies on the EBP chain being intact, 787 * so need frame if function can possibly throw 788 */ 789 !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) || 790 cgstate.accessedTLS || 791 sv64 792 ) 793 needframe = 1; 794 795 CodeBuilder cdbx; cdbx.ctor(); 796 797 Lagain: 798 spoff = 0; 799 char guessneedframe = needframe; 800 int cfa_offset = 0; 801 // if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru))) 802 // usednteh |= NTEHpassthru; 803 804 /* Compute BP offsets for variables on stack. 805 * The organization is: 806 * Para.size parameters 807 * -------- stack is aligned to STACKALIGN 808 * seg of return addr (if far function) 809 * IP of return addr 810 * BP. caller's BP 811 * DS (if Windows prolog/epilog) 812 * exception handling context symbol 813 * Fast.size fastpar 814 * Auto.size autos and regs 815 * regsave.off any saved registers 816 * Foff floating register 817 * Alloca.size alloca temporary 818 * CSoff common subs 819 * NDPoff any 8087 saved registers 820 * monitor context record 821 * any saved registers 822 */ 823 824 if (tym == TYifunc) 825 Para.size = 26; // how is this number derived? 826 else 827 { 828 version (FRAMEPTR) 829 { 830 Para.size = ((farfunc ? 2 : 1) + needframe) * REGSIZE; 831 if (needframe) 832 EBPtoESP = -REGSIZE; 833 } 834 else 835 Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE; 836 } 837 838 /* The real reason for the FAST section is because the implementation of contracts 839 * requires a consistent stack frame location for the 'this' pointer. But if varying 840 * stuff in Auto.offset causes different alignment for that section, the entire block can 841 * shift around, causing a crash in the contracts. 842 * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their 843 * own FAST section, which is never aligned at a size bigger than REGSIZE, and so 844 * its alignment never shifts around. 845 * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned 846 * individually rather than as a group. 847 */ 848 Fast.size = 0; 849 static if (NTEXCEPTIONS == 2) 850 { 851 Fast.size -= nteh_contextsym_size(); 852 version (MARS) 853 { 854 if (config.exe & EX_windos) 855 { 856 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0) 857 Fast.size -= 5 * 4; 858 } 859 } 860 } 861 862 /* Despite what the comment above says, aligning Fast section to size greater 863 * than REGSIZE does not break contract implementation. Fast.offset and 864 * Fast.alignment must be the same for the overriding and 865 * the overridden function, since they have the same parameters. Fast.size 866 * must be the same because otherwise, contract inheritance wouldn't work 867 * even if we didn't align Fast section to size greater than REGSIZE. Therefore, 868 * the only way aligning the section could cause problems with contract 869 * inheritance is if bias (declared below) differed for the overridden 870 * and the overriding function. 871 * 872 * Bias depends on Para.size and needframe. The value of Para.size depends on 873 * whether the function is an interrupt handler and whether it is a farfunc. 874 * DMD does not have _interrupt attribute and D does not make a distinction 875 * between near and far functions, so Para.size should always be 2 * REGSIZE 876 * for D. 877 * 878 * The value of needframe depends on a global setting that is only set 879 * during backend's initialization and on function flag Ffakeeh. On Windows, 880 * that flag is always set for virtual functions, for which contracts are 881 * defined and on other platforms, it is never set. Because of that 882 * the value of neadframe should always be the same for the overridden 883 * and the overriding function, and so bias should be the same too. 884 */ 885 886 version (FRAMEPTR) 887 int bias = enforcealign ? 0 : cast(int)(Para.size); 888 else 889 int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE)); 890 891 if (Fast.alignment < REGSIZE) 892 Fast.alignment = REGSIZE; 893 894 Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias); 895 896 if (Auto.alignment < REGSIZE) 897 Auto.alignment = REGSIZE; // necessary because localsize must be REGSIZE aligned 898 Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias); 899 900 regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias); 901 //printf("regsave.off = x%x, size = x%x, alignment = %x\n", 902 //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment); 903 904 if (floatreg) 905 { 906 uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE; 907 Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias); 908 //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize); 909 } 910 else 911 Foff = regsave.off; 912 913 Alloca.alignment = REGSIZE; 914 Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias); 915 916 CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias); 917 //printf("CSoff = x%x, size = x%x, alignment = %x\n", 918 //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment); 919 920 NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias); 921 922 regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving 923 pushoffuse = false; 924 pushoff = NDPoff; 925 /* We don't keep track of all the pushes and pops in a function. Hence, 926 * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder 927 * won't be setting ESP correctly. With pushoffuse, the registers are restored 928 * from EBP, which is kept track of properly. 929 */ 930 if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64)) 931 { 932 /* Instead of pushing the registers onto the stack one by one, 933 * allocate space in the stack frame and copy/restore them there. 934 */ 935 int xmmtopush = numbitsset(topush & XMMREGS); // XMM regs take 16 bytes 936 int gptopush = numbitsset(topush) - xmmtopush; // general purpose registers to save 937 if (NDPoff || xmmtopush || cgstate.funcarg.size) 938 { 939 pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16), 940 xmmtopush ? STACKALIGN : REGSIZE, bias); 941 pushoffuse = true; // tell others we're using this strategy 942 } 943 } 944 945 //printf("Fast.size = x%x, Auto.size = x%x\n", (int)Fast.size, (int)Auto.size); 946 947 cgstate.funcarg.alignment = STACKALIGN; 948 /* If the function doesn't need the extra alignment, don't do it. 949 * Can expand on this by allowing for locals that don't need extra alignment 950 * and calling functions that don't need it. 951 */ 952 if (pushoff == 0 && !calledafunc && config.fpxmmregs && (I32 || I64)) 953 { 954 cgstate.funcarg.alignment = I64 ? 8 : 4; 955 } 956 957 //printf("pushoff = %d, size = %d, alignment = %d, bias = %d\n", cast(int)pushoff, cast(int)cgstate.funcarg.size, cast(int)cgstate.funcarg.alignment, cast(int)bias); 958 cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias); 959 960 localsize = -cgstate.funcarg.offset; 961 962 //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n", 963 //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize); 964 assert(cast(targ_ptrdiff_t)localsize >= 0); 965 966 // Keep the stack aligned by 8 for any subsequent function calls 967 if (!I16 && calledafunc && 968 (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 969 { 970 int npush = numbitsset(topush); // number of registers that need saving 971 npush += numbitsset(topush & XMMREGS); // XMM regs take 16 bytes, so count them twice 972 if (pushoffuse) 973 npush = 0; 974 975 //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n", 976 //npush, Para.size, needframe, localsize); 977 978 int sz = cast(int)(localsize + npush * REGSIZE); 979 if (!enforcealign) 980 { 981 version (FRAMEPTR) 982 sz += Para.size; 983 else 984 sz += Para.size + (needframe ? 0 : -REGSIZE); 985 } 986 if (sz & (STACKALIGN - 1)) 987 localsize += STACKALIGN - (sz & (STACKALIGN - 1)); 988 } 989 cgstate.funcarg.offset = -localsize; 990 991 //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n", 992 //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize); 993 994 uint xlocalsize = cast(uint)localsize; // amount to subtract from ESP to make room for locals 995 996 if (tyf & mTYnaked) // if no prolog/epilog for function 997 { 998 hasframe = 1; 999 return; 1000 } 1001 1002 if (tym == TYifunc) 1003 { 1004 prolog_ifunc(cdbx,&tyf); 1005 hasframe = 1; 1006 cdb.append(cdbx); 1007 goto Lcont; 1008 } 1009 1010 /* Determine if we need BP set up */ 1011 if (enforcealign) 1012 { 1013 // we need BP to reset the stack before return 1014 // otherwise the return address is lost 1015 needframe = 1; 1016 1017 } 1018 else if (config.flags & CFGalwaysframe) 1019 needframe = 1; 1020 else 1021 { 1022 if (localsize) 1023 { 1024 if (I16 || 1025 !(config.flags4 & CFG4speed) || 1026 config.target_cpu < TARGET_Pentium || 1027 farfunc || 1028 config.flags & CFGstack || 1029 xlocalsize >= 0x1000 || 1030 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 1031 anyiasm || 1032 Alloca.size 1033 ) 1034 needframe = 1; 1035 } 1036 if (refparam && (anyiasm || I16)) 1037 needframe = 1; 1038 } 1039 1040 if (needframe) 1041 { 1042 assert(mfuncreg & mBP); // shouldn't have used mBP 1043 1044 if (!guessneedframe) // if guessed wrong 1045 goto Lagain; 1046 } 1047 1048 if (I16 && config.wflags & WFwindows && farfunc) 1049 { 1050 prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds); 1051 enter = false; // don't use ENTER instruction 1052 hasframe = 1; // we have a stack frame 1053 } 1054 else if (needframe) // if variables or parameters 1055 { 1056 prolog_frame(cdbx, farfunc, xlocalsize, enter, cfa_offset); 1057 hasframe = 1; 1058 } 1059 1060 /* Align the stack if necessary */ 1061 prolog_stackalign(cdbx); 1062 1063 /* Subtract from stack pointer the size of the local stack frame 1064 */ 1065 if (config.flags & CFGstack) // if stack overflow check 1066 { 1067 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 1068 if (Alloca.size) 1069 prolog_setupalloca(cdbx); 1070 } 1071 else if (needframe) /* if variables or parameters */ 1072 { 1073 if (xlocalsize) /* if any stack offset */ 1074 { 1075 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 1076 if (Alloca.size) 1077 prolog_setupalloca(cdbx); 1078 } 1079 else 1080 assert(Alloca.size == 0); 1081 } 1082 else if (xlocalsize) 1083 { 1084 assert(I32 || I64); 1085 prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc); 1086 version (FRAMEPTR) { } else 1087 BPoff += REGSIZE; 1088 } 1089 else 1090 assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor)); 1091 EBPtoESP += xlocalsize; 1092 if (hasframe) 1093 EBPtoESP += REGSIZE; 1094 1095 /* Win64 unwind needs the amount of code generated so far 1096 */ 1097 if (config.exe == EX_WIN64) 1098 { 1099 code *c = cdbx.peek(); 1100 pinholeopt(c, null); 1101 prolog_allocoffset = calcblksize(c); 1102 } 1103 1104 version (SCPP) 1105 { 1106 /* The idea is to generate trace for all functions if -Nc is not thrown. 1107 * If -Nc is thrown, generate trace only for global COMDATs, because those 1108 * are relevant to the FUNCTIONS statement in the linker .DEF file. 1109 * This same logic should be in epilog(). 1110 */ 1111 if (config.flags & CFGtrace && 1112 (!(config.flags4 & CFG4allcomdat) || 1113 funcsym_p.Sclass == SCcomdat || 1114 funcsym_p.Sclass == SCglobal || 1115 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 1116 ) 1117 ) 1118 { 1119 uint spalign = 0; 1120 int sz = cast(int)localsize; 1121 if (!enforcealign) 1122 { 1123 version (FRAMEPTR) 1124 sz += Para.size; 1125 else 1126 sz += Para.size + (needframe ? 0 : -REGSIZE); 1127 } 1128 if (STACKALIGN >= 16 && (sz & (STACKALIGN - 1))) 1129 spalign = STACKALIGN - (sz & (STACKALIGN - 1)); 1130 1131 if (spalign) 1132 { /* This could be avoided by moving the function call to after the 1133 * registers are saved. But I don't remember why the call is here 1134 * and not there. 1135 */ 1136 cod3_stackadj(cdbx, spalign); 1137 } 1138 1139 uint regsaved; 1140 prolog_trace(cdbx, farfunc, ®saved); 1141 1142 if (spalign) 1143 cod3_stackadj(cdbx, -spalign); 1144 useregs((ALLREGS | mBP | mES) & ~regsaved); 1145 } 1146 } 1147 1148 version (MARS) 1149 { 1150 if (usednteh & NTEHjmonitor) 1151 { Symbol *sthis; 1152 1153 for (SYMIDX si = 0; 1; si++) 1154 { assert(si < globsym.length); 1155 sthis = globsym[si]; 1156 if (strcmp(sthis.Sident.ptr,"this".ptr) == 0) 1157 break; 1158 } 1159 nteh_monitor_prolog(cdbx,sthis); 1160 EBPtoESP += 3 * 4; 1161 } 1162 } 1163 1164 cdb.append(cdbx); 1165 prolog_saveregs(cdb, topush, cfa_offset); 1166 1167 Lcont: 1168 1169 if (config.exe == EX_WIN64) 1170 { 1171 if (variadic(funcsym_p.Stype)) 1172 prolog_gen_win64_varargs(cdb); 1173 regm_t namedargs; 1174 prolog_loadparams(cdb, tyf, pushalloc, namedargs); 1175 return; 1176 } 1177 1178 prolog_ifunc2(cdb, tyf, tym, pushds); 1179 1180 static if (NTEXCEPTIONS == 2) 1181 { 1182 if (usednteh & NTEH_except) 1183 nteh_setsp(cdb, 0x89); // MOV __context[EBP].esp,ESP 1184 } 1185 1186 // Load register parameters off of the stack. Do not use 1187 // assignaddr(), as it will replace the stack reference with 1188 // the register! 1189 regm_t namedargs; 1190 prolog_loadparams(cdb, tyf, pushalloc, namedargs); 1191 1192 if (sv64) 1193 prolog_genvarargs(cdb, sv64, namedargs); 1194 1195 /* Alignment checks 1196 */ 1197 //assert(Auto.alignment <= STACKALIGN); 1198 //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0); 1199 } 1200 1201 /************************************ 1202 * Predicate for sorting auto symbols for qsort(). 1203 * Returns: 1204 * < 0 s1 goes farther from frame pointer 1205 * > 0 s1 goes nearer the frame pointer 1206 * = 0 no difference 1207 */ 1208 1209 extern (C) int 1210 autosort_cmp(scope const void *ps1, scope const void *ps2) 1211 { 1212 Symbol *s1 = *cast(Symbol **)ps1; 1213 Symbol *s2 = *cast(Symbol **)ps2; 1214 1215 /* Largest align size goes furthest away from frame pointer, 1216 * so they get allocated first. 1217 */ 1218 uint alignsize1 = Symbol_Salignsize(s1); 1219 uint alignsize2 = Symbol_Salignsize(s2); 1220 if (alignsize1 < alignsize2) 1221 return 1; 1222 else if (alignsize1 > alignsize2) 1223 return -1; 1224 1225 /* move variables nearer the frame pointer that have higher Sweights 1226 * because addressing mode is fewer bytes. Grouping together high Sweight 1227 * variables also may put them in the same cache 1228 */ 1229 if (s1.Sweight < s2.Sweight) 1230 return -1; 1231 else if (s1.Sweight > s2.Sweight) 1232 return 1; 1233 1234 /* More: 1235 * 1. put static arrays nearest the frame pointer, so buffer overflows 1236 * can't change other variable contents 1237 * 2. Do the coloring at the byte level to minimize stack usage 1238 */ 1239 return 0; 1240 } 1241 1242 /****************************** 1243 * Compute stack frame offsets for local variables. 1244 * that did not make it into registers. 1245 * Params: 1246 * symtab = function's symbol table 1247 * estimate = true for do estimate only, false for final 1248 */ 1249 void stackoffsets(ref symtab_t symtab, bool estimate) 1250 { 1251 //printf("stackoffsets() %s\n", funcsym_p.Sident.ptr); 1252 1253 Para.init(); // parameter offset 1254 Fast.init(); // SCfastpar offset 1255 Auto.init(); // automatic & register offset 1256 EEStack.init(); // for SCstack's 1257 1258 // Set if doing optimization of auto layout 1259 bool doAutoOpt = estimate && config.flags4 & CFG4optimized; 1260 1261 // Put autos in another array so we can do optimizations on the stack layout 1262 Symbol*[10] autotmp = void; 1263 Symbol **autos = null; 1264 if (doAutoOpt) 1265 { 1266 if (symtab.length <= autotmp.length) 1267 autos = autotmp.ptr; 1268 else 1269 { autos = cast(Symbol **)malloc(symtab.length * (*autos).sizeof); 1270 assert(autos); 1271 } 1272 } 1273 size_t autosi = 0; // number used in autos[] 1274 1275 for (int si = 0; si < symtab.length; si++) 1276 { Symbol *s = symtab[si]; 1277 1278 /* Don't allocate space for dead or zero size parameters 1279 */ 1280 switch (s.Sclass) 1281 { 1282 case SCfastpar: 1283 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh)) 1284 goto Ldefault; // don't need consistent stack frame 1285 break; 1286 1287 case SCparameter: 1288 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty))) 1289 { 1290 Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary 1291 s.Soffset = Para.offset; 1292 continue; 1293 } 1294 break; // allocate even if it's dead 1295 1296 case SCshadowreg: 1297 break; // allocate even if it's dead 1298 1299 default: 1300 Ldefault: 1301 if (Symbol_Sisdead(s, anyiasm)) 1302 continue; // don't allocate space 1303 break; 1304 } 1305 1306 targ_size_t sz = type_size(s.Stype); 1307 if (sz == 0) 1308 sz++; // can't handle 0 length structs 1309 1310 uint alignsize = Symbol_Salignsize(s); 1311 if (alignsize > STACKALIGN) 1312 alignsize = STACKALIGN; // no point if the stack is less aligned 1313 1314 //printf("symbol '%s', size = %d, alignsize = %d, read = %x\n",s.Sident.ptr, cast(int)sz, cast(int)alignsize, s.Sflags & SFLread); 1315 assert(cast(int)sz >= 0); 1316 1317 switch (s.Sclass) 1318 { 1319 case SCfastpar: 1320 /* Get these 1321 * right next to the stack frame pointer, EBP. 1322 * Needed so we can call nested contract functions 1323 * frequire and fensure. 1324 */ 1325 if (s.Sfl == FLreg) // if allocated in register 1326 continue; 1327 /* Needed because storing fastpar's on the stack in prolog() 1328 * does the entire register 1329 */ 1330 if (sz < REGSIZE) 1331 sz = REGSIZE; 1332 1333 Fast.offset = _align(sz,Fast.offset); 1334 s.Soffset = Fast.offset; 1335 Fast.offset += sz; 1336 //printf("fastpar '%s' sz = %d, fast offset = x%x, %p\n",s.Sident,(int)sz,(int)s.Soffset, s); 1337 1338 if (alignsize > Fast.alignment) 1339 Fast.alignment = alignsize; 1340 break; 1341 1342 case SCregister: 1343 case SCauto: 1344 if (s.Sfl == FLreg) // if allocated in register 1345 break; 1346 1347 if (doAutoOpt) 1348 { autos[autosi++] = s; // deal with later 1349 break; 1350 } 1351 1352 Auto.offset = _align(sz,Auto.offset); 1353 s.Soffset = Auto.offset; 1354 Auto.offset += sz; 1355 //printf("auto '%s' sz = %d, auto offset = x%lx\n",s.Sident,sz,(long)s.Soffset); 1356 1357 if (alignsize > Auto.alignment) 1358 Auto.alignment = alignsize; 1359 break; 1360 1361 case SCstack: 1362 EEStack.offset = _align(sz,EEStack.offset); 1363 s.Soffset = EEStack.offset; 1364 //printf("EEStack.offset = x%lx\n",(long)s.Soffset); 1365 EEStack.offset += sz; 1366 break; 1367 1368 case SCshadowreg: 1369 case SCparameter: 1370 if (config.exe == EX_WIN64) 1371 { 1372 assert((Para.offset & 7) == 0); 1373 s.Soffset = Para.offset; 1374 Para.offset += 8; 1375 break; 1376 } 1377 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general, 1378 * but are 4 byte aligned on the OSX 32 stack. 1379 */ 1380 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */ 1381 if (alignsize >= 16 && 1382 (I64 || (config.exe == EX_OSX && 1383 (tyaggregate(s.ty()) || tyvector(s.ty()))))) 1384 Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1); 1385 s.Soffset = Para.offset; 1386 //printf("%s param offset = x%lx, alignsize = %d\n",s.Sident,(long)s.Soffset, (int)alignsize); 1387 Para.offset += (s.Sflags & SFLdouble) 1388 ? type_size(tstypes[TYdouble]) // float passed as double 1389 : type_size(s.Stype); 1390 break; 1391 1392 case SCpseudo: 1393 case SCstatic: 1394 case SCbprel: 1395 break; 1396 default: 1397 symbol_print(s); 1398 assert(0); 1399 } 1400 } 1401 1402 if (autosi) 1403 { 1404 qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp); 1405 1406 vec_t tbl = vec_calloc(autosi); 1407 1408 for (size_t si = 0; si < autosi; si++) 1409 { 1410 Symbol *s = autos[si]; 1411 1412 targ_size_t sz = type_size(s.Stype); 1413 if (sz == 0) 1414 sz++; // can't handle 0 length structs 1415 1416 uint alignsize = Symbol_Salignsize(s); 1417 if (alignsize > STACKALIGN) 1418 alignsize = STACKALIGN; // no point if the stack is less aligned 1419 1420 /* See if we can share storage with another variable 1421 * if their live ranges do not overlap. 1422 */ 1423 if (// Don't share because could stomp on variables 1424 // used in finally blocks 1425 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) && 1426 s.Srange && !(s.Sflags & SFLspill)) 1427 { 1428 for (size_t i = 0; i < si; i++) 1429 { 1430 if (!vec_testbit(i,tbl)) 1431 continue; 1432 Symbol *sp = autos[i]; 1433 //printf("auto s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange)); 1434 if (vec_disjoint(s.Srange,sp.Srange) && 1435 !(sp.Soffset & (alignsize - 1)) && 1436 sz <= type_size(sp.Stype)) 1437 { 1438 vec_or(sp.Srange,sp.Srange,s.Srange); 1439 //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident); 1440 s.Soffset = sp.Soffset; 1441 goto L2; 1442 } 1443 } 1444 } 1445 Auto.offset = _align(sz,Auto.offset); 1446 s.Soffset = Auto.offset; 1447 //printf("auto '%s' sz = %d, auto offset = x%lx\n",s.Sident,sz,(long)s.Soffset); 1448 Auto.offset += sz; 1449 if (s.Srange && !(s.Sflags & SFLspill)) 1450 vec_setbit(si,tbl); 1451 1452 if (alignsize > Auto.alignment) 1453 Auto.alignment = alignsize; 1454 L2: { } 1455 } 1456 1457 vec_free(tbl); 1458 1459 if (autos != autotmp.ptr) 1460 free(autos); 1461 } 1462 } 1463 1464 /**************************** 1465 * Generate code for a block. 1466 */ 1467 1468 private void blcodgen(block *bl) 1469 { 1470 regm_t mfuncregsave = mfuncreg; 1471 1472 //dbg_printf("blcodgen(%p)\n",bl); 1473 1474 /* Determine existing immediate values in registers by ANDing 1475 together the values from all the predecessors of b. 1476 */ 1477 assert(bl.Bregcon.immed.mval == 0); 1478 regcon.immed.mval = 0; // assume no previous contents in registers 1479 // regcon.cse.mval = 0; 1480 foreach (bpl; ListRange(bl.Bpred)) 1481 { 1482 block *bp = list_block(bpl); 1483 1484 if (bpl == bl.Bpred) 1485 { regcon.immed = bp.Bregcon.immed; 1486 regcon.params = bp.Bregcon.params; 1487 // regcon.cse = bp.Bregcon.cse; 1488 } 1489 else 1490 { 1491 int i; 1492 1493 regcon.params &= bp.Bregcon.params; 1494 if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0) 1495 // Actual values must match, too 1496 for (i = 0; i < REGMAX; i++) 1497 { 1498 if (regcon.immed.value[i] != bp.Bregcon.immed.value[i]) 1499 regcon.immed.mval &= ~mask(i); 1500 } 1501 } 1502 } 1503 regcon.cse.mops &= regcon.cse.mval; 1504 1505 // Set regcon.mvar according to what variables are in registers for this block 1506 CodeBuilder cdb; cdb.ctor(); 1507 regcon.mvar = 0; 1508 regcon.mpvar = 0; 1509 regcon.indexregs = 1; 1510 int anyspill = 0; 1511 char *sflsave = null; 1512 if (config.flags4 & CFG4optimized) 1513 { 1514 CodeBuilder cdbload; cdbload.ctor(); 1515 CodeBuilder cdbstore; cdbstore.ctor(); 1516 1517 sflsave = cast(char *) alloca(globsym.length * char.sizeof); 1518 for (SYMIDX i = 0; i < globsym.length; i++) 1519 { 1520 Symbol *s = globsym[i]; 1521 1522 sflsave[i] = s.Sfl; 1523 if (regParamInPreg(s) && 1524 regcon.params & s.Spregm() && 1525 vec_testbit(dfoidx,s.Srange)) 1526 { 1527 // regcon.used |= s.Spregm(); 1528 } 1529 1530 if (s.Sfl == FLreg) 1531 { 1532 if (vec_testbit(dfoidx,s.Srange)) 1533 { 1534 regcon.mvar |= s.Sregm; 1535 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 1536 regcon.mpvar |= s.Sregm; 1537 } 1538 } 1539 else if (s.Sflags & SFLspill) 1540 { 1541 if (vec_testbit(dfoidx,s.Srange)) 1542 { 1543 anyspill = cast(int)(i + 1); 1544 cgreg_spillreg_prolog(bl,s,cdbstore,cdbload); 1545 if (vec_testbit(dfoidx,s.Slvreg)) 1546 { 1547 s.Sfl = FLreg; 1548 regcon.mvar |= s.Sregm; 1549 regcon.cse.mval &= ~s.Sregm; 1550 regcon.immed.mval &= ~s.Sregm; 1551 regcon.params &= ~s.Sregm; 1552 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 1553 regcon.mpvar |= s.Sregm; 1554 } 1555 } 1556 } 1557 } 1558 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 1559 { 1560 cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval); 1561 } 1562 cdb.append(cdbstore); 1563 cdb.append(cdbload); 1564 mfuncreg &= ~regcon.mvar; // use these registers 1565 regcon.used |= regcon.mvar; 1566 1567 // Determine if we have more than 1 uncommitted index register 1568 regcon.indexregs = IDXREGS & ~regcon.mvar; 1569 regcon.indexregs &= regcon.indexregs - 1; 1570 } 1571 1572 /* This doesn't work when calling the BC_finally function, 1573 * as it is one block calling another. 1574 */ 1575 //regsave.idx = 0; 1576 1577 reflocal = 0; 1578 int refparamsave = refparam; 1579 refparam = 0; 1580 assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops); 1581 1582 outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave); 1583 bl.Bcode = cdb.finish(); 1584 1585 for (int i = 0; i < anyspill; i++) 1586 { 1587 Symbol *s = globsym[i]; 1588 s.Sfl = sflsave[i]; // undo block register assignments 1589 } 1590 1591 if (reflocal) 1592 bl.Bflags |= BFLreflocal; 1593 if (refparam) 1594 bl.Bflags |= BFLrefparam; 1595 refparam |= refparamsave; 1596 bl.Bregcon.immed = regcon.immed; 1597 bl.Bregcon.cse = regcon.cse; 1598 bl.Bregcon.used = regcon.used; 1599 bl.Bregcon.params = regcon.params; 1600 1601 debug 1602 debugw && printf("code gen complete\n"); 1603 } 1604 1605 /***************************************** 1606 * Add in exception handling code. 1607 */ 1608 1609 version (SCPP) 1610 { 1611 1612 private void cgcod_eh() 1613 { 1614 list_t stack; 1615 int idx; 1616 int tryidx; 1617 1618 if (!(usednteh & (EHtry | EHcleanup))) 1619 return; 1620 1621 // Compute Bindex for each block 1622 for (block *b = startblock; b; b = b.Bnext) 1623 { 1624 b.Bindex = -1; 1625 b.Bflags &= ~BFLvisited; /* mark as unvisited */ 1626 } 1627 block *btry = null; 1628 int lastidx = 0; 1629 startblock.Bindex = 0; 1630 for (block *b = startblock; b; b = b.Bnext) 1631 { 1632 if (btry == b.Btry && b.BC == BCcatch) // if don't need to pop try block 1633 { 1634 block *br = list_block(b.Bpred); // find corresponding try block 1635 assert(br.BC == BCtry); 1636 b.Bindex = br.Bindex; 1637 } 1638 else if (btry != b.Btry && b.BC != BCcatch || 1639 !(b.Bflags & BFLvisited)) 1640 b.Bindex = lastidx; 1641 b.Bflags |= BFLvisited; 1642 1643 debug 1644 if (debuge) 1645 { 1646 WRBC(b.BC); 1647 printf(" block (%p) Btry=%p Bindex=%d\n",b,b.Btry,b.Bindex); 1648 } 1649 1650 except_index_set(b.Bindex); 1651 if (btry != b.Btry) // exited previous try block 1652 { 1653 except_pop(b,null,btry); 1654 btry = b.Btry; 1655 } 1656 if (b.BC == BCtry) 1657 { 1658 except_push(b,null,b); 1659 btry = b; 1660 tryidx = except_index_get(); 1661 CodeBuilder cdb; cdb.ctor(); 1662 nteh_gensindex(cdb,tryidx - 1); 1663 cdb.append(b.Bcode); 1664 b.Bcode = cdb.finish(); 1665 } 1666 1667 stack = null; 1668 for (code *c = b.Bcode; c; c = code_next(c)) 1669 { 1670 if ((c.Iop & ESCAPEmask) == ESCAPE) 1671 { 1672 code *c1 = null; 1673 switch (c.Iop & 0xFFFF00) 1674 { 1675 case ESCctor: 1676 //printf("ESCctor\n"); 1677 except_push(c,c.IEV1.Vtor,null); 1678 goto L1; 1679 1680 case ESCdtor: 1681 //printf("ESCdtor\n"); 1682 except_pop(c,c.IEV1.Vtor,null); 1683 L1: if (config.exe == EX_WIN32) 1684 { 1685 CodeBuilder cdb; cdb.ctor(); 1686 nteh_gensindex(cdb,except_index_get() - 1); 1687 c1 = cdb.finish(); 1688 c1.next = code_next(c); 1689 c.next = c1; 1690 } 1691 break; 1692 1693 case ESCmark: 1694 //printf("ESCmark\n"); 1695 idx = except_index_get(); 1696 list_prependdata(&stack,idx); 1697 except_mark(); 1698 break; 1699 1700 case ESCrelease: 1701 //printf("ESCrelease\n"); 1702 version (SCPP) 1703 { 1704 idx = list_data(stack); 1705 list_pop(&stack); 1706 if (idx != except_index_get()) 1707 { 1708 if (config.exe == EX_WIN32) 1709 { 1710 CodeBuilder cdb; cdb.ctor(); 1711 nteh_gensindex(cdb,idx - 1); 1712 c1 = cdb.finish(); 1713 c1.next = code_next(c); 1714 c.next = c1; 1715 } 1716 else 1717 { except_pair_append(c,idx - 1); 1718 c.Iop = ESCAPE | ESCoffset; 1719 } 1720 } 1721 except_release(); 1722 } 1723 break; 1724 1725 case ESCmark2: 1726 //printf("ESCmark2\n"); 1727 except_mark(); 1728 break; 1729 1730 case ESCrelease2: 1731 //printf("ESCrelease2\n"); 1732 version (SCPP) 1733 { 1734 except_release(); 1735 } 1736 break; 1737 1738 default: 1739 break; 1740 } 1741 } 1742 } 1743 assert(stack == null); 1744 b.Bendindex = except_index_get(); 1745 1746 if (b.BC != BCret && b.BC != BCretexp) 1747 lastidx = b.Bendindex; 1748 1749 // Set starting index for each of the successors 1750 int i = 0; 1751 foreach (bl; ListRange(b.Bsucc)) 1752 { 1753 block *bs = list_block(bl); 1754 if (b.BC == BCtry) 1755 { 1756 switch (i) 1757 { 1758 case 0: // block after catches 1759 bs.Bindex = b.Bendindex; 1760 break; 1761 1762 case 1: // 1st catch block 1763 bs.Bindex = tryidx; 1764 break; 1765 1766 default: // subsequent catch blocks 1767 bs.Bindex = b.Bindex; 1768 break; 1769 } 1770 1771 debug 1772 if (debuge) 1773 { 1774 printf(" 1setting %p to %d\n",bs,bs.Bindex); 1775 } 1776 } 1777 else if (!(bs.Bflags & BFLvisited)) 1778 { 1779 bs.Bindex = b.Bendindex; 1780 1781 debug 1782 if (debuge) 1783 { 1784 printf(" 2setting %p to %d\n",bs,bs.Bindex); 1785 } 1786 } 1787 bs.Bflags |= BFLvisited; 1788 i++; 1789 } 1790 } 1791 1792 if (config.exe == EX_WIN32) 1793 for (block *b = startblock; b; b = b.Bnext) 1794 { 1795 if (/*!b.Bcount ||*/ b.BC == BCtry) 1796 continue; 1797 foreach (bl; ListRange(b.Bpred)) 1798 { 1799 int pi = list_block(bl).Bendindex; 1800 if (b.Bindex != pi) 1801 { 1802 CodeBuilder cdb; cdb.ctor(); 1803 nteh_gensindex(cdb,b.Bindex - 1); 1804 cdb.append(b.Bcode); 1805 b.Bcode = cdb.finish(); 1806 break; 1807 } 1808 } 1809 } 1810 } 1811 1812 } 1813 1814 /****************************** 1815 * Count the number of bits set in a register mask. 1816 */ 1817 1818 int numbitsset(regm_t regm) 1819 { 1820 int n = 0; 1821 if (regm) 1822 do 1823 n++; 1824 while ((regm &= regm - 1) != 0); 1825 return n; 1826 } 1827 1828 /****************************** 1829 * Given a register mask, find and return the number 1830 * of the first register that fits. 1831 */ 1832 1833 reg_t findreg(regm_t regm) 1834 { 1835 return findreg(regm, __LINE__, __FILE__); 1836 } 1837 1838 reg_t findreg(regm_t regm, int line, const(char)* file) 1839 { 1840 debug 1841 regm_t regmsave = regm; 1842 1843 reg_t i = 0; 1844 while (1) 1845 { 1846 if (!(regm & 0xF)) 1847 { 1848 regm >>= 4; 1849 i += 4; 1850 if (!regm) 1851 break; 1852 } 1853 if (regm & 1) 1854 return i; 1855 regm >>= 1; 1856 i++; 1857 } 1858 1859 debug 1860 printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr); 1861 fflush(stdout); 1862 1863 // *(char*)0=0; 1864 assert(0); 1865 } 1866 1867 /*************** 1868 * Free element (but not its leaves! (assume they are already freed)) 1869 * Don't decrement Ecount! This is so we can detect if the common subexp 1870 * has already been evaluated. 1871 * If common subexpression is not required anymore, eliminate 1872 * references to it. 1873 */ 1874 1875 void freenode(elem *e) 1876 { 1877 elem_debug(e); 1878 //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount); 1879 if (e.Ecomsub--) return; /* usage count */ 1880 if (e.Ecount) /* if it was a CSE */ 1881 { 1882 for (size_t i = 0; i < regcon.cse.value.length; i++) 1883 { 1884 if (regcon.cse.value[i] == e) /* if a register is holding it */ 1885 { 1886 regcon.cse.mval &= ~mask(cast(uint)i); 1887 regcon.cse.mops &= ~mask(cast(uint)i); /* free masks */ 1888 } 1889 } 1890 CSE.remove(e); 1891 } 1892 } 1893 1894 /********************************* 1895 * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode(). 1896 */ 1897 1898 private void resetEcomsub(elem *e) 1899 { 1900 while (1) 1901 { 1902 elem_debug(e); 1903 e.Ecomsub = e.Ecount; 1904 const op = e.Eoper; 1905 if (!OTleaf(op)) 1906 { 1907 if (OTbinary(op)) 1908 resetEcomsub(e.EV.E2); 1909 e = e.EV.E1; 1910 } 1911 else 1912 break; 1913 } 1914 } 1915 1916 /********************************* 1917 * Determine if elem e is a register variable. 1918 * If so: 1919 * *pregm = mask of registers that make up the variable 1920 * *preg = the least significant register 1921 * returns true 1922 * Else 1923 * returns false 1924 */ 1925 1926 int isregvar(elem *e,regm_t *pregm,reg_t *preg) 1927 { 1928 Symbol *s; 1929 uint u; 1930 regm_t m; 1931 regm_t regm; 1932 reg_t reg; 1933 1934 elem_debug(e); 1935 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 1936 { 1937 s = e.EV.Vsym; 1938 switch (s.Sfl) 1939 { 1940 case FLreg: 1941 if (s.Sclass == SCparameter) 1942 { refparam = true; 1943 reflocal = true; 1944 } 1945 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw; 1946 regm = s.Sregm; 1947 //assert(tyreg(s.ty())); 1948 static if (0) 1949 { 1950 // Let's just see if there is a CSE in a reg we can use 1951 // instead. This helps avoid AGI's. 1952 if (e.Ecount && e.Ecount != e.Ecomsub) 1953 { int i; 1954 1955 for (i = 0; i < arraysize(regcon.cse.value); i++) 1956 { 1957 if (regcon.cse.value[i] == e) 1958 { reg = i; 1959 break; 1960 } 1961 } 1962 } 1963 } 1964 assert(regm & regcon.mvar && !(regm & ~regcon.mvar)); 1965 goto Lreg; 1966 1967 case FLpseudo: 1968 version (MARS) 1969 { 1970 u = s.Sreglsw; 1971 m = mask(u); 1972 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H 1973 { 1974 reg = u & 7; 1975 regm = m; 1976 goto Lreg; 1977 } 1978 } 1979 else 1980 { 1981 u = s.Sreglsw; 1982 m = pseudomask[u]; 1983 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H 1984 { 1985 reg = pseudoreg[u] & 7; 1986 regm = m; 1987 goto Lreg; 1988 } 1989 } 1990 break; 1991 1992 default: 1993 break; 1994 } 1995 } 1996 return false; 1997 1998 Lreg: 1999 if (preg) 2000 *preg = reg; 2001 if (pregm) 2002 *pregm = regm; 2003 return true; 2004 } 2005 2006 /********************************* 2007 * Allocate some registers. 2008 * Input: 2009 * pretregs Pointer to mask of registers to make selection from. 2010 * tym Mask of type we will store in registers. 2011 * Output: 2012 * *pretregs Mask of allocated registers. 2013 * *preg Register number of first allocated register. 2014 * msavereg,mfuncreg retregs bits are cleared. 2015 * regcon.cse.mval,regcon.cse.mops updated 2016 * Returns: 2017 * pointer to code generated if necessary to save any regcon.cse.mops on the 2018 * stack. 2019 */ 2020 2021 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym) 2022 { 2023 allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__); 2024 } 2025 2026 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym 2027 ,int line,const(char)* file) 2028 { 2029 reg_t reg; 2030 2031 static if (0) 2032 { 2033 if (pass == PASSfinal) 2034 { 2035 printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym ", 2036 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval), 2037 regm_str(msavereg),regm_str(*pretregs)); 2038 WRTYxx(tym); 2039 dbg_printf("\n"); 2040 } 2041 } 2042 tym = tybasic(tym); 2043 uint size = _tysize[tym]; 2044 *pretregs &= mES | allregs | XMMREGS; 2045 regm_t retregs = *pretregs; 2046 2047 debug if (retregs == 0) 2048 printf("allocreg: file %s(%d)\n", file, line); 2049 2050 if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars 2051 { 2052 if (size <= REGSIZE || (retregs & XMMREGS)) 2053 { 2054 *preg = findreg(retregs); 2055 assert(retregs == mask(*preg)); /* no more bits are set */ 2056 } 2057 else if (size <= 2 * REGSIZE) 2058 { 2059 *preg = findregmsw(retregs); 2060 assert(retregs & mLSW); 2061 } 2062 else 2063 assert(0); 2064 getregs(cdb,retregs); 2065 return; 2066 } 2067 int count = 0; 2068 L1: 2069 //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 2070 assert(++count < 20); /* fail instead of hanging if blocked */ 2071 assert(retregs); 2072 reg_t msreg = NOREG, lsreg = NOREG; /* no value assigned yet */ 2073 L3: 2074 //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 2075 regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params); 2076 if (!r) 2077 { 2078 r = retregs & ~(msavereg | regcon.cse.mval); 2079 if (!r) 2080 { 2081 r = retregs & ~(msavereg | regcon.cse.mops); 2082 if (!r) 2083 { r = retregs & ~msavereg; 2084 if (!r) 2085 r = retregs; 2086 } 2087 } 2088 } 2089 2090 if (size <= REGSIZE || retregs & XMMREGS) 2091 { 2092 if (r & ~mBP) 2093 r &= ~mBP; 2094 2095 // If only one index register, prefer to not use LSW registers 2096 if (!regcon.indexregs && r & ~mLSW) 2097 r &= ~mLSW; 2098 2099 if (pass == PASSfinal && r & ~lastretregs && !I16) 2100 { // Try not to always allocate the same register, 2101 // to schedule better 2102 2103 r &= ~lastretregs; 2104 if (r & ~last2retregs) 2105 { 2106 r &= ~last2retregs; 2107 if (r & ~last3retregs) 2108 { 2109 r &= ~last3retregs; 2110 if (r & ~last4retregs) 2111 { 2112 r &= ~last4retregs; 2113 // if (r & ~last5retregs) 2114 // r &= ~last5retregs; 2115 } 2116 } 2117 } 2118 if (r & ~mfuncreg) 2119 r &= ~mfuncreg; 2120 } 2121 reg = findreg(r); 2122 retregs = mask(reg); 2123 } 2124 else if (size <= 2 * REGSIZE) 2125 { 2126 /* Select pair with both regs free. Failing */ 2127 /* that, select pair with one reg free. */ 2128 2129 if (r & mBP) 2130 { 2131 retregs &= ~mBP; 2132 goto L3; 2133 } 2134 2135 if (r & mMSW) 2136 { 2137 if (r & mDX) 2138 msreg = DX; /* prefer to use DX over CX */ 2139 else 2140 msreg = findregmsw(r); 2141 r &= mLSW; /* see if there's an LSW also */ 2142 if (r) 2143 lsreg = findreg(r); 2144 else if (lsreg == NOREG) /* if don't have LSW yet */ 2145 { 2146 retregs &= mLSW; 2147 goto L3; 2148 } 2149 } 2150 else 2151 { 2152 if (I64 && !(r & mLSW)) 2153 { 2154 retregs = *pretregs & (mMSW | mLSW); 2155 assert(retregs); 2156 goto L1; 2157 } 2158 lsreg = findreglsw(r); 2159 if (msreg == NOREG) 2160 { 2161 retregs &= mMSW; 2162 assert(retregs); 2163 goto L3; 2164 } 2165 } 2166 reg = (msreg == ES) ? lsreg : msreg; 2167 retregs = mask(msreg) | mask(lsreg); 2168 } 2169 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 2170 { 2171 debug 2172 if (retregs != DOUBLEREGS) 2173 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs)); 2174 2175 assert(retregs == DOUBLEREGS); 2176 reg = AX; 2177 } 2178 else 2179 { 2180 debug 2181 { 2182 WRTYxx(tym); 2183 printf("\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n", 2184 file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym); 2185 } 2186 assert(0); 2187 } 2188 if (retregs & regcon.mvar) // if conflict with reg vars 2189 { 2190 if (!(size > REGSIZE && *pretregs == (mAX | mDX))) 2191 { 2192 retregs = (*pretregs &= ~(retregs & regcon.mvar)); 2193 goto L1; // try other registers 2194 } 2195 } 2196 *preg = reg; 2197 *pretregs = retregs; 2198 2199 //printf("Allocating %s\n",regm_str(retregs)); 2200 last5retregs = last4retregs; 2201 last4retregs = last3retregs; 2202 last3retregs = last2retregs; 2203 last2retregs = lastretregs; 2204 lastretregs = retregs; 2205 getregs(cdb, retregs); 2206 } 2207 2208 2209 /***************************************** 2210 * Allocate a scratch register. 2211 * Params: 2212 * cdb = where to write any generated code to 2213 * regm = mask of registers to pick one from 2214 * Returns: 2215 * selected register 2216 */ 2217 reg_t allocScratchReg(ref CodeBuilder cdb, regm_t regm) 2218 { 2219 reg_t r; 2220 allocreg(cdb, ®m, &r, TYoffset); 2221 return r; 2222 } 2223 2224 2225 /****************************** 2226 * Determine registers that should be destroyed upon arrival 2227 * to code entry point for exception handling. 2228 */ 2229 regm_t lpadregs() 2230 { 2231 regm_t used; 2232 if (config.ehmethod == EHmethod.EH_DWARF) 2233 used = allregs & ~mfuncreg; 2234 else 2235 used = (I32 | I64) ? allregs : (ALLREGS | mES); 2236 //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg)); 2237 return used; 2238 } 2239 2240 2241 /************************* 2242 * Mark registers as used. 2243 */ 2244 2245 void useregs(regm_t regm) 2246 { 2247 //printf("useregs(x%x) %s\n", regm, regm_str(regm)); 2248 mfuncreg &= ~regm; 2249 regcon.used |= regm; // registers used in this block 2250 regcon.params &= ~regm; 2251 if (regm & regcon.mpvar) // if modified a fastpar register variable 2252 regcon.params = 0; // toss them all out 2253 } 2254 2255 /************************* 2256 * We are going to use the registers in mask r. 2257 * Generate any code necessary to save any regs. 2258 */ 2259 2260 void getregs(ref CodeBuilder cdb, regm_t r) 2261 { 2262 //printf("getregs(x%x) %s\n", r, regm_str(r)); 2263 regm_t ms = r & regcon.cse.mops; // mask of common subs we must save 2264 useregs(r); 2265 regcon.cse.mval &= ~r; 2266 msavereg &= ~r; // regs that are destroyed 2267 regcon.immed.mval &= ~r; 2268 if (ms) 2269 cse_save(cdb, ms); 2270 } 2271 2272 /************************* 2273 * We are going to use the registers in mask r. 2274 * Same as getregs(), but assert if code is needed to be generated. 2275 */ 2276 void getregsNoSave(regm_t r) 2277 { 2278 //printf("getregsNoSave(x%x) %s\n", r, regm_str(r)); 2279 assert(!(r & regcon.cse.mops)); // mask of common subs we must save 2280 useregs(r); 2281 regcon.cse.mval &= ~r; 2282 msavereg &= ~r; // regs that are destroyed 2283 regcon.immed.mval &= ~r; 2284 } 2285 2286 /***************************************** 2287 * Copy registers in cse.mops into memory. 2288 */ 2289 2290 private void cse_save(ref CodeBuilder cdb, regm_t ms) 2291 { 2292 assert((ms & regcon.cse.mops) == ms); 2293 regcon.cse.mops &= ~ms; 2294 2295 /* Skip CSEs that are already saved */ 2296 for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1) 2297 { 2298 if (regm & ms) 2299 { 2300 const e = regcon.cse.value[findreg(regm)]; 2301 const sz = tysize(e.Ety); 2302 foreach (const ref cse; CSE.filter(e)) 2303 { 2304 if (sz <= REGSIZE || 2305 sz <= 2 * REGSIZE && 2306 (regm & mMSW && cse.regm & mMSW || 2307 regm & mLSW && cse.regm & mLSW) || 2308 sz == 4 * REGSIZE && regm == cse.regm 2309 ) 2310 { 2311 ms &= ~regm; 2312 if (!ms) 2313 return; 2314 break; 2315 } 2316 } 2317 } 2318 } 2319 2320 while (ms) 2321 { 2322 auto cse = CSE.add(); 2323 reg_t reg = findreg(ms); /* the register to save */ 2324 cse.e = regcon.cse.value[reg]; 2325 cse.regm = mask(reg); 2326 2327 ms &= ~mask(reg); /* turn off reg bit in ms */ 2328 2329 // If we can simply reload the CSE, we don't need to save it 2330 if (cse_simple(&cse.csimple, cse.e)) 2331 cse.flags |= CSEsimple; 2332 else 2333 { 2334 CSE.updateSizeAndAlign(cse.e); 2335 gen_storecse(cdb, cse.e.Ety, reg, cse.slot); 2336 reflocal = true; 2337 } 2338 } 2339 } 2340 2341 /****************************************** 2342 * Getregs without marking immediate register values as gone. 2343 */ 2344 2345 void getregs_imm(ref CodeBuilder cdb, regm_t r) 2346 { 2347 regm_t save = regcon.immed.mval; 2348 getregs(cdb,r); 2349 regcon.immed.mval = save; 2350 } 2351 2352 /****************************************** 2353 * Flush all CSE's out of registers and into memory. 2354 * Input: 2355 * do87 !=0 means save 87 registers too 2356 */ 2357 2358 void cse_flush(ref CodeBuilder cdb, int do87) 2359 { 2360 //dbg_printf("cse_flush()\n"); 2361 cse_save(cdb,regcon.cse.mops); // save any CSEs to memory 2362 if (do87) 2363 save87(cdb); // save any 8087 temporaries 2364 } 2365 2366 /************************* 2367 * Common subexpressions exist in registers. Note this in regcon.cse.mval. 2368 * Input: 2369 * e the subexpression 2370 * regm mask of registers holding it 2371 * opsflag if != 0 then regcon.cse.mops gets set too 2372 * Returns: 2373 * false not saved as a CSE 2374 * true saved as a CSE 2375 */ 2376 2377 bool cssave(elem *e,regm_t regm,uint opsflag) 2378 { 2379 bool result = false; 2380 2381 /*if (e.Ecount && e.Ecount == e.Ecomsub)*/ 2382 if (e.Ecount && e.Ecomsub) 2383 { 2384 if (!opsflag && pass != PASSfinal && (I32 || I64)) 2385 return false; 2386 2387 //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag); 2388 regm &= mBP | ALLREGS | mES | XMMREGS; /* just to be sure */ 2389 2390 /+ 2391 /* Do not register CSEs if they are register variables and */ 2392 /* are not operator nodes. This forces the register allocation */ 2393 /* to go through allocreg(), which will prevent using register */ 2394 /* variables for scratch. */ 2395 if (opsflag || !(regm & regcon.mvar)) 2396 +/ 2397 for (uint i = 0; regm; i++) 2398 { 2399 regm_t mi = mask(i); 2400 if (regm & mi) 2401 { 2402 regm &= ~mi; 2403 2404 // If we don't need this CSE, and the register already 2405 // holds a CSE that we do need, don't mark the new one 2406 if (regcon.cse.mval & mi && regcon.cse.value[i] != e && 2407 !opsflag && regcon.cse.mops & mi) 2408 continue; 2409 2410 regcon.cse.mval |= mi; 2411 if (opsflag) 2412 regcon.cse.mops |= mi; 2413 //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e); 2414 regcon.cse.value[i] = e; 2415 result = true; 2416 } 2417 } 2418 } 2419 return result; 2420 } 2421 2422 /************************************* 2423 * Determine if a computation should be done into a register. 2424 */ 2425 2426 bool evalinregister(elem *e) 2427 { 2428 if (config.exe == EX_WIN64 && e.Eoper == OPrelconst) 2429 return true; 2430 2431 if (e.Ecount == 0) /* elem is not a CSE, therefore */ 2432 /* we don't need to evaluate it */ 2433 /* in a register */ 2434 return false; 2435 if (!OTleaf(e.Eoper)) /* operators are always in register */ 2436 return true; 2437 2438 // Need to rethink this code if float or double can be CSE'd 2439 uint sz = tysize(e.Ety); 2440 if (e.Ecount == e.Ecomsub) /* elem is a CSE that needs */ 2441 /* to be generated */ 2442 { 2443 if ((I32 || I64) && 2444 //pass == PASSfinal && // bug 8987 2445 sz <= REGSIZE) 2446 { 2447 // Do it only if at least 2 registers are available 2448 regm_t m = allregs & ~regcon.mvar; 2449 if (sz == 1) 2450 m &= BYTEREGS; 2451 if (m & (m - 1)) // if more than one register 2452 { // Need to be at least 3 registers available, as 2453 // addressing modes can use up 2. 2454 while (!(m & 1)) 2455 m >>= 1; 2456 m >>= 1; 2457 if (m & (m - 1)) 2458 return true; 2459 } 2460 } 2461 return false; 2462 } 2463 2464 /* Elem is now a CSE that might have been generated. If so, and */ 2465 /* it's in a register already, the computation should be done */ 2466 /* using that register. */ 2467 regm_t emask = 0; 2468 for (uint i = 0; i < regcon.cse.value.length; i++) 2469 if (regcon.cse.value[i] == e) 2470 emask |= mask(i); 2471 emask &= regcon.cse.mval; // mask of available CSEs 2472 if (sz <= REGSIZE) 2473 return emask != 0; /* the CSE is in a register */ 2474 else if (sz <= 2 * REGSIZE) 2475 return (emask & mMSW) && (emask & mLSW); 2476 return true; /* cop-out for now */ 2477 } 2478 2479 /******************************************************* 2480 * Return mask of scratch registers. 2481 */ 2482 2483 regm_t getscratch() 2484 { 2485 regm_t scratch = 0; 2486 if (pass == PASSfinal) 2487 { 2488 scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval | 2489 regcon.immed.mval | regcon.params | mfuncreg); 2490 } 2491 return scratch; 2492 } 2493 2494 /****************************** 2495 * Evaluate an elem that is a common subexp that has been encountered 2496 * before. 2497 * Look first to see if it is already in a register. 2498 */ 2499 2500 private void comsub(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2501 { 2502 tym_t tym; 2503 regm_t regm,emask; 2504 reg_t reg; 2505 uint byte_,sz; 2506 2507 //printf("comsub(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 2508 elem_debug(e); 2509 2510 debug 2511 { 2512 if (e.Ecomsub > e.Ecount) 2513 elem_print(e); 2514 } 2515 2516 assert(e.Ecomsub <= e.Ecount); 2517 2518 if (*pretregs == 0) // no possible side effects anyway 2519 { 2520 return; 2521 } 2522 2523 /* First construct a mask, emask, of all the registers that 2524 * have the right contents. 2525 */ 2526 emask = 0; 2527 for (uint i = 0; i < regcon.cse.value.length; i++) 2528 { 2529 //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]); 2530 if (regcon.cse.value[i] == e) // if contents are right 2531 emask |= mask(i); // turn on bit for reg 2532 } 2533 emask &= regcon.cse.mval; // make sure all bits are valid 2534 2535 if (emask & XMMREGS && *pretregs == mPSW) 2536 { } 2537 else if (tyxmmreg(e.Ety) && config.fpxmmregs) 2538 { 2539 if (*pretregs & (mST0 | mST01)) 2540 { 2541 regm_t retregs = *pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1; 2542 comsub(cdb, e, &retregs); 2543 fixresult(cdb,e,retregs,pretregs); 2544 return; 2545 } 2546 } 2547 else if (tyfloating(e.Ety) && config.inline8087) 2548 { 2549 comsub87(cdb,e,pretregs); 2550 return; 2551 } 2552 2553 2554 /* create mask of CSEs */ 2555 regm_t csemask = CSE.mask(e); 2556 csemask &= ~emask; // stuff already in registers 2557 2558 debug if (debugw) 2559 { 2560 printf("comsub(e=%p): *pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n", 2561 e,regm_str(*pretregs),regm_str(emask),regm_str(csemask), 2562 regm_str(regcon.cse.mval),regm_str(regcon.mvar)); 2563 if (regcon.cse.mval & 1) 2564 elem_print(regcon.cse.value[0]); 2565 } 2566 2567 tym = tybasic(e.Ety); 2568 sz = _tysize[tym]; 2569 byte_ = sz == 1; 2570 2571 if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register 2572 { 2573 /* First see if it is already in a correct register */ 2574 2575 regm = emask & *pretregs; 2576 if (regm == 0) 2577 regm = emask; /* try any other register */ 2578 if (regm) /* if it's in a register */ 2579 { 2580 if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (*pretregs & regcon.mvar) == *pretregs) 2581 { 2582 regm = mask(findreg(regm)); 2583 fixresult(cdb,e,regm,pretregs); 2584 return; 2585 } 2586 } 2587 2588 if (OTleaf(e.Eoper)) /* if not op or func */ 2589 goto reload; /* reload data */ 2590 2591 foreach (ref cse; CSE.filter(e)) 2592 { 2593 regm_t retregs; 2594 2595 if (cse.flags & CSEsimple) 2596 { 2597 retregs = *pretregs; 2598 if (byte_ && !(retregs & BYTEREGS)) 2599 retregs = BYTEREGS; 2600 else if (!(retregs & allregs)) 2601 retregs = allregs; 2602 allocreg(cdb,&retregs,®,tym); 2603 code *cr = &cse.csimple; 2604 cr.setReg(reg); 2605 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1) 2606 cr.Irex |= REX; 2607 cdb.gen(cr); 2608 goto L10; 2609 } 2610 else 2611 { 2612 reflocal = true; 2613 cse.flags |= CSEload; 2614 if (*pretregs == mPSW) // if result in CCs only 2615 { 2616 if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety))) 2617 { 2618 retregs = XMMREGS; 2619 allocreg(cdb,&retregs,®,tym); 2620 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2621 regcon.cse.mval |= mask(reg); // cs is in a reg 2622 regcon.cse.value[reg] = e; 2623 fixresult(cdb,e,retregs,pretregs); 2624 } 2625 else 2626 { 2627 // CMP cs[BP],0 2628 gen_testcse(cdb, cse.e.Ety, sz, cse.slot); 2629 } 2630 } 2631 else 2632 { 2633 retregs = *pretregs; 2634 if (byte_ && !(retregs & BYTEREGS)) 2635 retregs = BYTEREGS; 2636 allocreg(cdb,&retregs,®,tym); 2637 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2638 L10: 2639 regcon.cse.mval |= mask(reg); // cs is in a reg 2640 regcon.cse.value[reg] = e; 2641 fixresult(cdb,e,retregs,pretregs); 2642 } 2643 } 2644 return; 2645 } 2646 2647 debug 2648 { 2649 printf("couldn't find cse e = %p, pass = %d\n",e,pass); 2650 elem_print(e); 2651 } 2652 assert(0); /* should have found it */ 2653 } 2654 else /* reg pair is req'd */ 2655 if (sz <= 2 * REGSIZE) 2656 { 2657 reg_t msreg,lsreg; 2658 2659 /* see if we have both */ 2660 if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP))) 2661 { /* we don't have both */ 2662 debug if (!OTleaf(e.Eoper)) 2663 { 2664 printf("e = %p, op = x%x, emask = %s, csemask = %s\n", 2665 e,e.Eoper,regm_str(emask),regm_str(csemask)); 2666 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW); 2667 elem_print(e); 2668 } 2669 2670 assert(OTleaf(e.Eoper)); /* must have both for operators */ 2671 goto reload; 2672 } 2673 2674 /* Look for right vals in any regs */ 2675 regm = *pretregs & mMSW; 2676 if (emask & regm) 2677 msreg = findreg(emask & regm); 2678 else if (emask & mMSW) 2679 msreg = findregmsw(emask); 2680 else /* reload from cse array */ 2681 { 2682 if (!regm) 2683 regm = mMSW & ALLREGS; 2684 allocreg(cdb,®m,&msreg,TYint); 2685 loadcse(cdb,e,msreg,mMSW); 2686 } 2687 2688 regm = *pretregs & (mLSW | mBP); 2689 if (emask & regm) 2690 lsreg = findreg(emask & regm); 2691 else if (emask & (mLSW | mBP)) 2692 lsreg = findreglsw(emask); 2693 else 2694 { 2695 if (!regm) 2696 regm = mLSW; 2697 allocreg(cdb,®m,&lsreg,TYint); 2698 loadcse(cdb,e,lsreg,mLSW | mBP); 2699 } 2700 2701 regm = mask(msreg) | mask(lsreg); /* mask of result */ 2702 fixresult(cdb,e,regm,pretregs); 2703 return; 2704 } 2705 else if (tym == TYdouble || tym == TYdouble_alias) // double 2706 { 2707 assert(I16); 2708 if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16) 2709 { 2710 static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d 2711 for (reg = 0; reg != NOREG; reg = dblreg[reg]) 2712 { 2713 assert(cast(int) reg >= 0 && reg <= 7); 2714 if (mask(reg) & csemask) 2715 loadcse(cdb,e,reg,mask(reg)); 2716 } 2717 regm = DOUBLEREGS_16; 2718 fixresult(cdb,e,regm,pretregs); 2719 return; 2720 } 2721 if (OTleaf(e.Eoper)) goto reload; 2722 2723 debug 2724 printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask)); 2725 2726 assert(0); 2727 } 2728 else 2729 { 2730 debug 2731 printf("e = %p, tym = x%x\n",e,tym); 2732 2733 assert(0); 2734 } 2735 2736 reload: /* reload result from memory */ 2737 switch (e.Eoper) 2738 { 2739 case OPrelconst: 2740 cdrelconst(cdb,e,pretregs); 2741 break; 2742 2743 case OPgot: 2744 if (config.exe & EX_posix) 2745 { 2746 cdgot(cdb,e,pretregs); 2747 break; 2748 } 2749 goto default; 2750 2751 default: 2752 if (*pretregs == mPSW && 2753 config.fpxmmregs && 2754 (tyxmmreg(tym) || tysimd(tym))) 2755 { 2756 regm_t retregs = XMMREGS | mPSW; 2757 loaddata(cdb,e,&retregs); 2758 cssave(e,retregs,false); 2759 return; 2760 } 2761 loaddata(cdb,e,pretregs); 2762 break; 2763 } 2764 cssave(e,*pretregs,false); 2765 } 2766 2767 2768 /***************************** 2769 * Load reg from cse save area on stack. 2770 */ 2771 2772 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm) 2773 { 2774 foreach (ref cse; CSE.filter(e)) 2775 { 2776 //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm)); 2777 if (cse.regm & regm) 2778 { 2779 reflocal = true; 2780 cse.flags |= CSEload; /* it was loaded */ 2781 regcon.cse.value[reg] = e; 2782 regcon.cse.mval |= mask(reg); 2783 getregs(cdb,mask(reg)); 2784 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2785 return; 2786 } 2787 } 2788 debug 2789 { 2790 printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm)); 2791 elem_print(e); 2792 } 2793 assert(0); 2794 } 2795 2796 /*************************** 2797 * Generate code sequence for an elem. 2798 * Input: 2799 * pretregs = mask of possible registers to return result in 2800 * Note: longs are in AX,BX or CX,DX or SI,DI 2801 * doubles are AX,BX,CX,DX only 2802 * constflag = 1 for user of result will not modify the 2803 * registers returned in *pretregs. 2804 * 2 for freenode() not called. 2805 * Output: 2806 * *pretregs mask of registers result is returned in 2807 * Returns: 2808 * pointer to code sequence generated 2809 */ 2810 2811 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op) 2812 { 2813 (*cdxxx[op])(cdb,e,pretregs); 2814 } 2815 2816 // jump table 2817 private extern (C++) __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx = 2818 [ 2819 OPunde: &cderr, 2820 OPadd: &cdorth, 2821 OPmul: &cdmul, 2822 OPand: &cdorth, 2823 OPmin: &cdorth, 2824 OPnot: &cdnot, 2825 OPcom: &cdcom, 2826 OPcond: &cdcond, 2827 OPcomma: &cdcomma, 2828 OPremquo: &cddiv, 2829 OPdiv: &cddiv, 2830 OPmod: &cddiv, 2831 OPxor: &cdorth, 2832 OPstring: &cderr, 2833 OPrelconst: &cdrelconst, 2834 OPinp: &cdport, 2835 OPoutp: &cdport, 2836 OPasm: &cdasm, 2837 OPinfo: &cdinfo, 2838 OPdctor: &cddctor, 2839 OPddtor: &cdddtor, 2840 OPctor: &cdctor, 2841 OPdtor: &cddtor, 2842 OPmark: &cdmark, 2843 OPvoid: &cdvoid, 2844 OPhalt: &cdhalt, 2845 OPnullptr: &cderr, 2846 OPpair: &cdpair, 2847 OPrpair: &cdpair, 2848 2849 OPor: &cdorth, 2850 OPoror: &cdloglog, 2851 OPandand: &cdloglog, 2852 OProl: &cdshift, 2853 OPror: &cdshift, 2854 OPshl: &cdshift, 2855 OPshr: &cdshift, 2856 OPashr: &cdshift, 2857 OPbit: &cderr, 2858 OPind: &cdind, 2859 OPaddr: &cderr, 2860 OPneg: &cdneg, 2861 OPuadd: &cderr, 2862 OPabs: &cdabs, 2863 OPtoprec: &cdtoprec, 2864 OPsqrt: &cdneg, 2865 OPsin: &cdneg, 2866 OPcos: &cdneg, 2867 OPscale: &cdscale, 2868 OPyl2x: &cdscale, 2869 OPyl2xp1: &cdscale, 2870 OPcmpxchg: &cdcmpxchg, 2871 OPrint: &cdneg, 2872 OPrndtol: &cdrndtol, 2873 OPstrlen: &cdstrlen, 2874 OPstrcpy: &cdstrcpy, 2875 OPmemcpy: &cdmemcpy, 2876 OPmemset: &cdmemset, 2877 OPstrcat: &cderr, 2878 OPstrcmp: &cdstrcmp, 2879 OPmemcmp: &cdmemcmp, 2880 OPsetjmp: &cdsetjmp, 2881 OPnegass: &cdaddass, 2882 OPpreinc: &cderr, 2883 OPpredec: &cderr, 2884 OPstreq: &cdstreq, 2885 OPpostinc: &cdpost, 2886 OPpostdec: &cdpost, 2887 OPeq: &cdeq, 2888 OPaddass: &cdaddass, 2889 OPminass: &cdaddass, 2890 OPmulass: &cdmulass, 2891 OPdivass: &cddivass, 2892 OPmodass: &cddivass, 2893 OPshrass: &cdshass, 2894 OPashrass: &cdshass, 2895 OPshlass: &cdshass, 2896 OPandass: &cdaddass, 2897 OPxorass: &cdaddass, 2898 OPorass: &cdaddass, 2899 2900 OPle: &cdcmp, 2901 OPgt: &cdcmp, 2902 OPlt: &cdcmp, 2903 OPge: &cdcmp, 2904 OPeqeq: &cdcmp, 2905 OPne: &cdcmp, 2906 2907 OPunord: &cdcmp, 2908 OPlg: &cdcmp, 2909 OPleg: &cdcmp, 2910 OPule: &cdcmp, 2911 OPul: &cdcmp, 2912 OPuge: &cdcmp, 2913 OPug: &cdcmp, 2914 OPue: &cdcmp, 2915 OPngt: &cdcmp, 2916 OPnge: &cdcmp, 2917 OPnlt: &cdcmp, 2918 OPnle: &cdcmp, 2919 OPord: &cdcmp, 2920 OPnlg: &cdcmp, 2921 OPnleg: &cdcmp, 2922 OPnule: &cdcmp, 2923 OPnul: &cdcmp, 2924 OPnuge: &cdcmp, 2925 OPnug: &cdcmp, 2926 OPnue: &cdcmp, 2927 2928 OPvp_fp: &cdcnvt, 2929 OPcvp_fp: &cdcnvt, 2930 OPoffset: &cdlngsht, 2931 OPnp_fp: &cdshtlng, 2932 OPnp_f16p: &cdfar16, 2933 OPf16p_np: &cdfar16, 2934 2935 OPs16_32: &cdshtlng, 2936 OPu16_32: &cdshtlng, 2937 OPd_s32: &cdcnvt, 2938 OPb_8: &cdcnvt, 2939 OPs32_d: &cdcnvt, 2940 OPd_s16: &cdcnvt, 2941 OPs16_d: &cdcnvt, 2942 OPd_u16: &cdcnvt, 2943 OPu16_d: &cdcnvt, 2944 OPd_u32: &cdcnvt, 2945 OPu32_d: &cdcnvt, 2946 OP32_16: &cdlngsht, 2947 OPd_f: &cdcnvt, 2948 OPf_d: &cdcnvt, 2949 OPd_ld: &cdcnvt, 2950 OPld_d: &cdcnvt, 2951 OPc_r: &cdconvt87, 2952 OPc_i: &cdconvt87, 2953 OPu8_16: &cdbyteint, 2954 OPs8_16: &cdbyteint, 2955 OP16_8: &cdlngsht, 2956 OPu32_64: &cdshtlng, 2957 OPs32_64: &cdshtlng, 2958 OP64_32: &cdlngsht, 2959 OPu64_128: &cdshtlng, 2960 OPs64_128: &cdshtlng, 2961 OP128_64: &cdlngsht, 2962 OPmsw: &cdmsw, 2963 2964 OPd_s64: &cdcnvt, 2965 OPs64_d: &cdcnvt, 2966 OPd_u64: &cdcnvt, 2967 OPu64_d: &cdcnvt, 2968 OPld_u64: &cdcnvt, 2969 OPparam: &cderr, 2970 OPsizeof: &cderr, 2971 OParrow: &cderr, 2972 OParrowstar: &cderr, 2973 OPcolon: &cderr, 2974 OPcolon2: &cderr, 2975 OPbool: &cdnot, 2976 OPcall: &cdfunc, 2977 OPucall: &cdfunc, 2978 OPcallns: &cdfunc, 2979 OPucallns: &cdfunc, 2980 OPstrpar: &cderr, 2981 OPstrctor: &cderr, 2982 OPstrthis: &cdstrthis, 2983 OPconst: &cderr, 2984 OPvar: &cderr, 2985 OPnew: &cderr, 2986 OPanew: &cderr, 2987 OPdelete: &cderr, 2988 OPadelete: &cderr, 2989 OPbrack: &cderr, 2990 OPframeptr: &cdframeptr, 2991 OPgot: &cdgot, 2992 2993 OPbsf: &cdbscan, 2994 OPbsr: &cdbscan, 2995 OPbtst: &cdbtst, 2996 OPbt: &cdbt, 2997 OPbtc: &cdbt, 2998 OPbtr: &cdbt, 2999 OPbts: &cdbt, 3000 3001 OPbswap: &cdbswap, 3002 OPpopcnt: &cdpopcnt, 3003 OPvector: &cdvector, 3004 OPvecsto: &cdvecsto, 3005 OPvecfill: &cdvecfill, 3006 OPva_start: &cderr, 3007 OPprefetch: &cdprefetch, 3008 ]; 3009 3010 3011 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag) 3012 { 3013 Symbol *s; 3014 3015 debug if (debugw) 3016 { 3017 printf("+codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs)); 3018 WROP(e.Eoper); 3019 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3020 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3021 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 3022 } 3023 3024 assert(e); 3025 elem_debug(e); 3026 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 3027 { 3028 debug 3029 { 3030 printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs)); 3031 elem_print(e); 3032 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3033 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3034 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 3035 } 3036 assert(0); 3037 } 3038 3039 if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar) 3040 *pretregs &= ~regcon.mvar; /* can't use register vars */ 3041 3042 uint op = e.Eoper; 3043 if (e.Ecount && e.Ecount != e.Ecomsub) // if common subexp 3044 { 3045 comsub(cdb,e,pretregs); 3046 goto L1; 3047 } 3048 3049 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 3050 cdb.genlinnum(e.Esrcpos); 3051 3052 switch (op) 3053 { 3054 default: 3055 if (e.Ecount) /* if common subexp */ 3056 { 3057 /* if no return value */ 3058 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0) 3059 { 3060 if (*pretregs & (mST0 | mST01)) 3061 { 3062 //printf("generate ST0 comsub for:\n"); 3063 //elem_print(e); 3064 3065 regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1; 3066 (*cdxxx[op])(cdb,e,&retregs); 3067 cssave(e,retregs,!OTleaf(op)); 3068 fixresult(cdb, e, retregs, pretregs); 3069 goto L1; 3070 } 3071 if (tysize(e.Ety) == 1) 3072 *pretregs |= BYTEREGS; 3073 else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs) 3074 *pretregs |= XMMREGS; 3075 else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias) 3076 *pretregs |= DOUBLEREGS; 3077 else 3078 *pretregs |= ALLREGS; /* make one */ 3079 } 3080 3081 /* BUG: For CSEs, make sure we have both an MSW */ 3082 /* and an LSW specified in *pretregs */ 3083 } 3084 assert(op <= OPMAX); 3085 (*cdxxx[op])(cdb,e,pretregs); 3086 break; 3087 3088 case OPrelconst: 3089 cdrelconst(cdb,e,pretregs); 3090 break; 3091 3092 case OPvar: 3093 if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg && 3094 (s.Sregm & *pretregs) == s.Sregm) 3095 { 3096 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE) 3097 *pretregs &= mPSW | (s.Sregm & mLSW); 3098 else 3099 *pretregs &= mPSW | s.Sregm; 3100 } 3101 goto case OPconst; 3102 3103 case OPconst: 3104 if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile)) 3105 { 3106 switch (tybasic(e.Ety)) 3107 { 3108 case TYbool: 3109 case TYchar: 3110 case TYschar: 3111 case TYuchar: 3112 *pretregs |= BYTEREGS; 3113 break; 3114 3115 case TYnref: 3116 case TYnptr: 3117 case TYsptr: 3118 case TYcptr: 3119 case TYfgPtr: 3120 case TYimmutPtr: 3121 case TYsharePtr: 3122 case TYrestrictPtr: 3123 *pretregs |= I16 ? IDXREGS : ALLREGS; 3124 break; 3125 3126 case TYshort: 3127 case TYushort: 3128 case TYint: 3129 case TYuint: 3130 case TYlong: 3131 case TYulong: 3132 case TYllong: 3133 case TYullong: 3134 case TYcent: 3135 case TYucent: 3136 case TYfptr: 3137 case TYhptr: 3138 case TYvptr: 3139 *pretregs |= ALLREGS; 3140 break; 3141 3142 default: 3143 break; 3144 } 3145 } 3146 loaddata(cdb,e,pretregs); 3147 break; 3148 } 3149 cssave(e,*pretregs,!OTleaf(op)); 3150 L1: 3151 if (!(constflag & 2)) 3152 freenode(e); 3153 3154 debug if (debugw) 3155 { 3156 printf("-codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs)); 3157 WROP(op); 3158 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3159 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3160 } 3161 } 3162 3163 /******************************* 3164 * Same as codelem(), but do not destroy the registers in keepmsk. 3165 * Use scratch registers as much as possible, then use stack. 3166 * Input: 3167 * constflag true if user of result will not modify the 3168 * registers returned in *pretregs. 3169 */ 3170 3171 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag) 3172 { 3173 regm_t touse; 3174 3175 debug if (debugw) 3176 printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3177 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3178 3179 elem_debug(e); 3180 if (constflag) 3181 { 3182 regm_t regm; 3183 reg_t reg; 3184 3185 if (isregvar(e,®m,®) && // if e is a register variable 3186 (regm & *pretregs) == regm && // in one of the right regs 3187 e.EV.Voffset == 0 3188 ) 3189 { 3190 uint sz1 = tysize(e.Ety); 3191 uint sz2 = tysize(e.EV.Vsym.Stype.Tty); 3192 if (sz1 <= REGSIZE && sz2 > REGSIZE) 3193 regm &= mLSW | XMMREGS; 3194 fixresult(cdb,e,regm,pretregs); 3195 cssave(e,regm,0); 3196 freenode(e); 3197 3198 debug if (debugw) 3199 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3200 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3201 3202 return; 3203 } 3204 } 3205 regm_t overlap = msavereg & keepmsk; 3206 msavereg |= keepmsk; /* add to mask of regs to save */ 3207 regm_t oldregcon = regcon.cse.mval; 3208 regm_t oldregimmed = regcon.immed.mval; 3209 regm_t oldmfuncreg = mfuncreg; /* remember old one */ 3210 mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar; 3211 uint stackpushsave = stackpush; 3212 char calledafuncsave = calledafunc; 3213 calledafunc = 0; 3214 CodeBuilder cdbx; cdbx.ctor(); 3215 codelem(cdbx,e,pretregs,constflag); // generate code for the elem 3216 3217 regm_t tosave = keepmsk & ~msavereg; /* registers to save */ 3218 if (tosave) 3219 { 3220 cgstate.stackclean++; 3221 genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg); 3222 cgstate.stackclean--; 3223 } 3224 3225 /* Assert that no new CSEs are generated that are not reflected */ 3226 /* in mfuncreg. */ 3227 debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0) 3228 printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n", 3229 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar)); 3230 3231 assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0); 3232 3233 /* bugzilla 3521 3234 * The problem is: 3235 * reg op (reg = exp) 3236 * where reg must be preserved (in keepregs) while the expression to be evaluated 3237 * must change it. 3238 * The only solution is to make this variable not a register. 3239 */ 3240 if (regcon.mvar & tosave) 3241 { 3242 //elem_print(e); 3243 //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave)); 3244 cgreg_unregister(regcon.mvar & tosave); 3245 } 3246 3247 /* which registers can we use to save other registers in? */ 3248 if (config.flags4 & CFG4space || // if optimize for space 3249 config.target_cpu >= TARGET_80486) // PUSH/POP ops are 1 cycle 3250 touse = 0; // PUSH/POP pairs are always shorter 3251 else 3252 { 3253 touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval); 3254 /* Don't use registers we'll have to save/restore */ 3255 touse &= ~(fregsaved & oldmfuncreg); 3256 /* Don't use registers that have constant values in them, since 3257 the code generated might have used the value. 3258 */ 3259 touse &= ~oldregimmed; 3260 } 3261 3262 CodeBuilder cdbs1; cdbs1.ctor(); 3263 code *cs2 = null; 3264 int adjesp = 0; 3265 3266 for (uint i = 0; tosave; i++) 3267 { 3268 regm_t mi = mask(i); 3269 3270 assert(i < REGMAX); 3271 if (mi & tosave) /* i = register to save */ 3272 { 3273 if (touse) /* if any scratch registers */ 3274 { 3275 uint j; 3276 for (j = 0; j < 8; j++) 3277 { 3278 regm_t mj = mask(j); 3279 3280 if (touse & mj) 3281 { 3282 genmovreg(cdbs1,j,i); 3283 cs2 = cat(genmovreg(i,j),cs2); 3284 touse &= ~mj; 3285 mfuncreg &= ~mj; 3286 regcon.used |= mj; 3287 break; 3288 } 3289 } 3290 assert(j < 8); 3291 } 3292 else // else use memory 3293 { 3294 CodeBuilder cdby; cdby.ctor(); 3295 uint size = gensaverestore(mask(i), cdbs1, cdby); 3296 cs2 = cat(cdby.finish(),cs2); 3297 if (size) 3298 { 3299 stackchanged = 1; 3300 adjesp += size; 3301 } 3302 } 3303 getregs(cdbx,mi); 3304 tosave &= ~mi; 3305 } 3306 } 3307 CodeBuilder cdbs2; cdbs2.ctor(); 3308 if (adjesp) 3309 { 3310 // If this is done an odd number of times, it 3311 // will throw off the 8 byte stack alignment. 3312 // We should *only* worry about this if a function 3313 // was called in the code generation by codelem(). 3314 int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1); 3315 if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 3316 { 3317 regm_t mval_save = regcon.immed.mval; 3318 regcon.immed.mval = 0; // prevent reghasvalue() optimizations 3319 // because c hasn't been executed yet 3320 cod3_stackadj(cdbs1, sz); 3321 regcon.immed.mval = mval_save; 3322 cdbs1.genadjesp(sz); 3323 3324 cod3_stackadj(cdbs2, -sz); 3325 cdbs2.genadjesp(-sz); 3326 } 3327 cdbs2.append(cs2); 3328 3329 3330 cdbs1.genadjesp(adjesp); 3331 cdbs2.genadjesp(-adjesp); 3332 } 3333 else 3334 cdbs2.append(cs2); 3335 3336 calledafunc |= calledafuncsave; 3337 msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save */ 3338 mfuncreg &= oldmfuncreg; /* update original */ 3339 3340 debug if (debugw) 3341 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3342 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3343 3344 cdb.append(cdbs1); 3345 cdb.append(cdbx); 3346 cdb.append(cdbs2); 3347 return; 3348 } 3349 3350 /********************************************* 3351 * Turn register mask into a string suitable for printing. 3352 */ 3353 3354 const(char)* regm_str(regm_t rm) 3355 { 3356 enum NUM = 10; 3357 enum SMAX = 128; 3358 __gshared char[SMAX + 1][NUM] str; 3359 __gshared int i; 3360 3361 if (rm == 0) 3362 return "0"; 3363 if (rm == ALLREGS) 3364 return "ALLREGS"; 3365 if (rm == BYTEREGS) 3366 return "BYTEREGS"; 3367 if (rm == allregs) 3368 return "allregs"; 3369 if (rm == XMMREGS) 3370 return "XMMREGS"; 3371 char *p = str[i].ptr; 3372 if (++i == NUM) 3373 i = 0; 3374 *p = 0; 3375 for (size_t j = 0; j < 32; j++) 3376 { 3377 if (mask(cast(uint)j) & rm) 3378 { 3379 strcat(p,regstring[j]); 3380 rm &= ~mask(cast(uint)j); 3381 if (rm) 3382 strcat(p,"|"); 3383 } 3384 } 3385 if (rm) 3386 { char *s = p + strlen(p); 3387 sprintf(s,"x%02x",rm); 3388 } 3389 assert(strlen(p) <= SMAX); 3390 return strdup(p); 3391 } 3392 3393 /********************************* 3394 * Scan down comma-expressions. 3395 * Output: 3396 * *pe = first elem down right side that is not an OPcomma 3397 * Returns: 3398 * code generated for left branches of comma-expressions 3399 */ 3400 3401 void docommas(ref CodeBuilder cdb,elem **pe) 3402 { 3403 uint stackpushsave = stackpush; 3404 int stackcleansave = cgstate.stackclean; 3405 cgstate.stackclean = 0; 3406 elem* e = *pe; 3407 while (1) 3408 { 3409 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 3410 { 3411 cdb.genlinnum(e.Esrcpos); 3412 //e.Esrcpos.Slinnum = 0; // don't do it twice 3413 } 3414 if (e.Eoper != OPcomma) 3415 break; 3416 regm_t retregs = 0; 3417 codelem(cdb,e.EV.E1,&retregs,true); 3418 elem* eold = e; 3419 e = e.EV.E2; 3420 freenode(eold); 3421 } 3422 *pe = e; 3423 assert(cgstate.stackclean == 0); 3424 cgstate.stackclean = stackcleansave; 3425 genstackclean(cdb,stackpush - stackpushsave,0); 3426 } 3427 3428 /************************** 3429 * For elems in regcon that don't match regconsave, 3430 * clear the corresponding bit in regcon.cse.mval. 3431 * Do same for regcon.immed. 3432 */ 3433 3434 void andregcon(con_t *pregconsave) 3435 { 3436 regm_t m = ~1; 3437 for (int i = 0; i < REGMAX; i++) 3438 { 3439 if (pregconsave.cse.value[i] != regcon.cse.value[i]) 3440 regcon.cse.mval &= m; 3441 if (pregconsave.immed.value[i] != regcon.immed.value[i]) 3442 regcon.immed.mval &= m; 3443 m <<= 1; 3444 m |= 1; 3445 } 3446 //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval)); 3447 regcon.used |= pregconsave.used; 3448 regcon.cse.mval &= pregconsave.cse.mval; 3449 regcon.immed.mval &= pregconsave.immed.mval; 3450 regcon.params &= pregconsave.params; 3451 //printf("regcon.cse.mval®con.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops)); 3452 regcon.cse.mops &= regcon.cse.mval; 3453 } 3454 3455 }