1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1985-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cgcod.d, backend/cgcod.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cgcod.d 11 */ 12 13 module dmd.backend.cgcod; 14 15 version = FRAMEPTR; 16 17 version (SCPP) 18 version = COMPILE; 19 version (MARS) 20 version = COMPILE; 21 22 version (COMPILE) 23 { 24 25 import core.stdc.stdio; 26 import core.stdc.stdlib; 27 import core.stdc..string; 28 29 import dmd.backend.backend; 30 import dmd.backend.cc; 31 import dmd.backend.cdef; 32 import dmd.backend.code; 33 import dmd.backend.cgcse; 34 import dmd.backend.code_x86; 35 import dmd.backend.codebuilder; 36 import dmd.backend.dlist; 37 import dmd.backend.dvec; 38 import dmd.backend.melf; 39 import dmd.backend.mem; 40 import dmd.backend.el; 41 import dmd.backend.exh; 42 import dmd.backend.global; 43 import dmd.backend.obj; 44 import dmd.backend.oper; 45 import dmd.backend.outbuf; 46 import dmd.backend.rtlsym; 47 import dmd.backend.ty; 48 import dmd.backend.type; 49 import dmd.backend.xmm; 50 51 import dmd.backend.barray; 52 53 version (SCPP) 54 { 55 import parser; 56 import precomp; 57 } 58 59 extern (C++): 60 61 nothrow: 62 63 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 64 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 65 66 version (MARS) 67 enum MARS = true; 68 else 69 enum MARS = false; 70 71 void dwarf_except_gentables(Funcsym *sfunc, uint startoffset, uint retoffset); 72 int REGSIZE(); 73 74 private extern (D) uint mask(uint m) { return 1 << m; } 75 76 77 __gshared 78 { 79 bool floatreg; // !=0 if floating register is required 80 81 int hasframe; // !=0 if this function has a stack frame 82 bool enforcealign; // enforced stack alignment 83 targ_size_t spoff; 84 targ_size_t Foff; // BP offset of floating register 85 targ_size_t CSoff; // offset of common sub expressions 86 targ_size_t NDPoff; // offset of saved 8087 registers 87 targ_size_t pushoff; // offset of saved registers 88 bool pushoffuse; // using pushoff 89 int BPoff; // offset from BP 90 int EBPtoESP; // add to EBP offset to get ESP offset 91 LocalSection Para; // section of function parameters 92 LocalSection Auto; // section of automatics and registers 93 LocalSection Fast; // section of fastpar 94 LocalSection EEStack; // offset of SCstack variables from ESP 95 LocalSection Alloca; // data for alloca() temporary 96 97 REGSAVE regsave; 98 99 CGstate cgstate; // state of code generator 100 101 regm_t BYTEREGS = BYTEREGS_INIT; 102 regm_t ALLREGS = ALLREGS_INIT; 103 104 105 /************************************ 106 * # of bytes that SP is beyond BP. 107 */ 108 109 uint stackpush; 110 111 int stackchanged; /* set to !=0 if any use of the stack 112 other than accessing parameters. Used 113 to see if we can address parameters 114 with ESP rather than EBP. 115 */ 116 int refparam; // !=0 if we referenced any parameters 117 int reflocal; // !=0 if we referenced any locals 118 bool anyiasm; // !=0 if any inline assembler 119 char calledafunc; // !=0 if we called a function 120 char needframe; // if true, then we will need the frame 121 // pointer (BP for the 8088) 122 char gotref; // !=0 if the GOTsym was referenced 123 uint usednteh; // if !=0, then used NT exception handling 124 bool calledFinally; // true if called a BC_finally block 125 126 /* Register contents */ 127 con_t regcon; 128 129 int pass; // PASSxxxx 130 131 private Symbol *retsym; // set to symbol that should be placed in 132 // register AX 133 134 /**************************** 135 * Register masks. 136 */ 137 138 regm_t msavereg; // Mask of registers that we would like to save. 139 // they are temporaries (set by scodelem()) 140 regm_t mfuncreg; // Mask of registers preserved by a function 141 142 regm_t allregs; // ALLREGS optionally including mBP 143 144 int dfoidx; /* which block we are in */ 145 146 targ_size_t funcoffset; // offset of start of function 147 targ_size_t prolog_allocoffset; // offset past adj of stack allocation 148 targ_size_t startoffset; // size of function entry code 149 targ_size_t retoffset; /* offset from start of func to ret code */ 150 targ_size_t retsize; /* size of function return */ 151 152 private regm_t lastretregs,last2retregs,last3retregs,last4retregs,last5retregs; 153 154 } 155 156 /********************************* 157 * Generate code for a function. 158 * Note at the end of this routine mfuncreg will contain the mask 159 * of registers not affected by the function. Some minor optimization 160 * possibilities are here. 161 * Params: 162 * sfunc = function to generate code for 163 */ 164 165 void codgen(Symbol *sfunc) 166 { 167 bool flag; 168 block *btry; 169 170 // Register usage. If a bit is on, the corresponding register is live 171 // in that basic block. 172 173 //printf("codgen('%s')\n",funcsym_p.Sident.ptr); 174 assert(sfunc == funcsym_p); 175 assert(cseg == funcsym_p.Sseg); 176 177 cgreg_init(); 178 CSE.initialize(); 179 tym_t functy = tybasic(sfunc.ty()); 180 cod3_initregs(); 181 allregs = ALLREGS; 182 pass = PASSinitial; 183 Alloca.init(); 184 anyiasm = 0; 185 186 if (config.ehmethod == EHmethod.EH_DWARF) 187 { 188 /* The dwarf unwinder relies on the function epilog to exist 189 */ 190 for (block* b = startblock; b; b = b.Bnext) 191 { 192 if (b.BC == BCexit) 193 b.BC = BCret; 194 } 195 } 196 197 tryagain: 198 debug 199 if (debugr) 200 printf("------------------ PASS%s -----------------\n", 201 (pass == PASSinitial) ? "init".ptr : ((pass == PASSreg) ? "reg".ptr : "final".ptr)); 202 203 lastretregs = last2retregs = last3retregs = last4retregs = last5retregs = 0; 204 205 // if no parameters, assume we don't need a stack frame 206 needframe = 0; 207 enforcealign = false; 208 gotref = 0; 209 stackchanged = 0; 210 stackpush = 0; 211 refparam = 0; 212 calledafunc = 0; 213 retsym = null; 214 215 cgstate.stackclean = 1; 216 cgstate.funcarg.init(); 217 cgstate.funcargtos = ~0; 218 cgstate.accessedTLS = false; 219 STACKALIGN = TARGET_STACKALIGN; 220 221 regsave.reset(); 222 memset(global87.stack.ptr,0,global87.stack.sizeof); 223 224 calledFinally = false; 225 usednteh = 0; 226 227 static if (MARS && TARGET_WINDOS) 228 { 229 if (sfunc.Sfunc.Fflags3 & Fjmonitor) 230 usednteh |= NTEHjmonitor; 231 } 232 else version (SCPP) 233 { 234 if (CPP) 235 { 236 if (config.exe == EX_WIN32 && 237 (sfunc.Stype.Tflags & TFemptyexc || sfunc.Stype.Texcspec)) 238 usednteh |= NTEHexcspec; 239 except_reset(); 240 } 241 } 242 243 // Set on a trial basis, turning it off if anything might throw 244 sfunc.Sfunc.Fflags3 |= Fnothrow; 245 246 floatreg = false; 247 assert(global87.stackused == 0); /* nobody in 8087 stack */ 248 249 CSE.start(); 250 memset(®con,0,regcon.sizeof); 251 regcon.cse.mval = regcon.cse.mops = 0; // no common subs yet 252 msavereg = 0; 253 uint nretblocks = 0; 254 mfuncreg = fregsaved; // so we can see which are used 255 // (bit is cleared each time 256 // we use one) 257 for (block* b = startblock; b; b = b.Bnext) 258 { 259 memset(&b.Bregcon,0,b.Bregcon.sizeof); // Clear out values in registers 260 if (b.Belem) 261 resetEcomsub(b.Belem); // reset all the Ecomsubs 262 if (b.BC == BCasm) 263 anyiasm = 1; // we have inline assembler 264 if (b.BC == BCret || b.BC == BCretexp) 265 nretblocks++; 266 } 267 268 if (!config.fulltypes || (config.flags4 & CFG4optimized)) 269 { 270 regm_t noparams = 0; 271 for (int i = 0; i < globsym.top; i++) 272 { 273 Symbol *s = globsym.tab[i]; 274 s.Sflags &= ~SFLread; 275 switch (s.Sclass) 276 { 277 case SCfastpar: 278 case SCshadowreg: 279 regcon.params |= s.Spregm(); 280 goto case SCparameter; 281 282 case SCparameter: 283 if (s.Sfl == FLreg) 284 noparams |= s.Sregm; 285 break; 286 287 default: 288 break; 289 } 290 } 291 regcon.params &= ~noparams; 292 } 293 294 if (config.flags4 & CFG4optimized) 295 { 296 if (nretblocks == 0 && // if no return blocks in function 297 !(sfunc.ty() & mTYnaked)) // naked functions may have hidden veys of returning 298 sfunc.Sflags |= SFLexit; // mark function as never returning 299 300 assert(dfo); 301 302 cgreg_reset(); 303 for (dfoidx = 0; dfoidx < dfo.length; dfoidx++) 304 { 305 regcon.used = msavereg | regcon.cse.mval; // registers already in use 306 block* b = dfo[dfoidx]; 307 blcodgen(b); // gen code in depth-first order 308 //printf("b.Bregcon.used = %s\n", regm_str(b.Bregcon.used)); 309 cgreg_used(dfoidx, b.Bregcon.used); // gather register used information 310 } 311 } 312 else 313 { 314 pass = PASSfinal; 315 for (block* b = startblock; b; b = b.Bnext) 316 blcodgen(b); // generate the code for each block 317 } 318 regcon.immed.mval = 0; 319 assert(!regcon.cse.mops); // should have all been used 320 321 // See which variables we can put into registers 322 if (pass != PASSfinal && 323 !anyiasm) // possible LEA or LES opcodes 324 { 325 allregs |= cod3_useBP(); // see if we can use EBP 326 327 // If pic code, but EBX was never needed 328 if (!(allregs & mask(PICREG)) && !gotref) 329 { 330 allregs |= mask(PICREG); // EBX can now be used 331 cgreg_assign(retsym); 332 pass = PASSreg; 333 } 334 else if (cgreg_assign(retsym)) // if we found some registers 335 pass = PASSreg; 336 else 337 pass = PASSfinal; 338 for (block* b = startblock; b; b = b.Bnext) 339 { 340 code_free(b.Bcode); 341 b.Bcode = null; 342 } 343 goto tryagain; 344 } 345 cgreg_term(); 346 347 version (SCPP) 348 { 349 if (CPP) 350 cgcod_eh(); 351 } 352 353 // See if we need to enforce a particular stack alignment 354 foreach (i; 0 .. globsym.top) 355 { 356 Symbol *s = globsym.tab[i]; 357 358 if (Symbol_Sisdead(s, anyiasm)) 359 continue; 360 361 switch (s.Sclass) 362 { 363 case SCregister: 364 case SCauto: 365 case SCfastpar: 366 if (s.Sfl == FLreg) 367 break; 368 369 const sz = type_alignsize(s.Stype); 370 if (sz > STACKALIGN && (I64 || config.exe == EX_OSX)) 371 { 372 STACKALIGN = sz; 373 enforcealign = true; 374 } 375 break; 376 377 default: 378 break; 379 } 380 } 381 382 stackoffsets(1); // compute addresses of stack variables 383 cod5_prol_epi(); // see where to place prolog/epilog 384 CSE.finish(); // compute addresses and sizes of CSE saves 385 386 if (configv.addlinenumbers) 387 objmod.linnum(sfunc.Sfunc.Fstartline,sfunc.Sseg,Offset(sfunc.Sseg)); 388 389 // Otherwise, jmp's to startblock will execute the prolog again 390 assert(!startblock.Bpred); 391 392 CodeBuilder cdbprolog; cdbprolog.ctor(); 393 prolog(cdbprolog); // gen function start code 394 code *cprolog = cdbprolog.finish(); 395 if (cprolog) 396 pinholeopt(cprolog,null); // optimize 397 398 funcoffset = Offset(sfunc.Sseg); 399 targ_size_t coffset = Offset(sfunc.Sseg); 400 401 if (eecontext.EEelem) 402 genEEcode(); 403 404 for (block* b = startblock; b; b = b.Bnext) 405 { 406 // We couldn't do this before because localsize was unknown 407 switch (b.BC) 408 { 409 case BCret: 410 if (configv.addlinenumbers && b.Bsrcpos.Slinnum && !(sfunc.ty() & mTYnaked)) 411 { 412 CodeBuilder cdb; cdb.ctor(); 413 cdb.append(b.Bcode); 414 cdb.genlinnum(b.Bsrcpos); 415 b.Bcode = cdb.finish(); 416 } 417 goto case BCretexp; 418 419 case BCretexp: 420 epilog(b); 421 break; 422 423 default: 424 if (b.Bflags & BFLepilog) 425 epilog(b); 426 break; 427 } 428 assignaddr(b); // assign addresses 429 pinholeopt(b.Bcode,b); // do pinhole optimization 430 if (b.Bflags & BFLprolog) // do function prolog 431 { 432 startoffset = coffset + calcblksize(cprolog) - funcoffset; 433 b.Bcode = cat(cprolog,b.Bcode); 434 } 435 cgsched_block(b); 436 b.Bsize = calcblksize(b.Bcode); // calculate block size 437 if (b.Balign) 438 { 439 targ_size_t u = b.Balign - 1; 440 coffset = (coffset + u) & ~u; 441 } 442 b.Boffset = coffset; /* offset of this block */ 443 coffset += b.Bsize; /* offset of following block */ 444 } 445 446 debug 447 debugw && printf("code addr complete\n"); 448 449 // Do jump optimization 450 do 451 { 452 flag = false; 453 for (block* b = startblock; b; b = b.Bnext) 454 { 455 if (b.Bflags & BFLjmpoptdone) /* if no more jmp opts for this blk */ 456 continue; 457 int i = branch(b,0); // see if jmp => jmp short 458 if (i) // if any bytes saved 459 { targ_size_t offset; 460 461 b.Bsize -= i; 462 offset = b.Boffset + b.Bsize; 463 for (block* bn = b.Bnext; bn; bn = bn.Bnext) 464 { 465 if (bn.Balign) 466 { targ_size_t u = bn.Balign - 1; 467 468 offset = (offset + u) & ~u; 469 } 470 bn.Boffset = offset; 471 offset += bn.Bsize; 472 } 473 coffset = offset; 474 flag = true; 475 } 476 } 477 if (!I16 && !(config.flags4 & CFG4optimized)) 478 break; // use the long conditional jmps 479 } while (flag); // loop till no more bytes saved 480 481 debug 482 debugw && printf("code jump optimization complete\n"); 483 484 version (MARS) 485 { 486 if (usednteh & NTEH_try) 487 { 488 // Do this before code is emitted because we patch some instructions 489 nteh_filltables(); 490 } 491 } 492 493 // Compute starting offset for switch tables 494 targ_size_t swoffset; 495 int jmpseg = -1; 496 if (config.flags & CFGromable) 497 { 498 jmpseg = 0; 499 swoffset = coffset; 500 } 501 502 // Emit the generated code 503 if (eecontext.EEcompile == 1) 504 { 505 codout(sfunc.Sseg,eecontext.EEcode); 506 code_free(eecontext.EEcode); 507 version (SCPP) 508 { 509 el_free(eecontext.EEelem); 510 } 511 } 512 else 513 { 514 for (block* b = startblock; b; b = b.Bnext) 515 { 516 if (b.BC == BCjmptab || b.BC == BCswitch) 517 { 518 if (jmpseg == -1) 519 { 520 jmpseg = objmod.jmpTableSegment(sfunc); 521 swoffset = Offset(jmpseg); 522 } 523 swoffset = _align(0,swoffset); 524 b.Btableoffset = swoffset; /* offset of sw tab */ 525 swoffset += b.Btablesize; 526 } 527 jmpaddr(b.Bcode); /* assign jump addresses */ 528 529 debug 530 if (debugc) 531 { 532 printf("Boffset = x%x, Bsize = x%x, Coffset = x%x\n", 533 cast(int)b.Boffset,cast(int)b.Bsize,cast(int)Offset(sfunc.Sseg)); 534 if (b.Bcode) 535 printf( "First opcode of block is: %0x\n", b.Bcode.Iop ); 536 } 537 538 if (b.Balign) 539 { uint u = b.Balign; 540 uint nalign = (u - cast(uint)Offset(sfunc.Sseg)) & (u - 1); 541 542 cod3_align_bytes(sfunc.Sseg, nalign); 543 } 544 assert(b.Boffset == Offset(sfunc.Sseg)); 545 546 version (SCPP) 547 { 548 if (CPP && !(config.exe == EX_WIN32)) 549 { 550 //printf("b = %p, index = %d\n",b,b.Bindex); 551 //except_index_set(b.Bindex); 552 553 if (btry != b.Btry) 554 { 555 btry = b.Btry; 556 except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset); 557 } 558 if (b.BC == BCtry) 559 { 560 btry = b; 561 except_pair_setoffset(b,Offset(sfunc.Sseg) - funcoffset); 562 } 563 } 564 } 565 566 codout(sfunc.Sseg,b.Bcode); // output code 567 } 568 if (coffset != Offset(sfunc.Sseg)) 569 { 570 debug 571 printf("coffset = %d, Offset(sfunc.Sseg) = %d\n",cast(int)coffset,cast(int)Offset(sfunc.Sseg)); 572 573 assert(0); 574 } 575 sfunc.Ssize = Offset(sfunc.Sseg) - funcoffset; // size of function 576 577 static if (NTEXCEPTIONS || MARS) 578 { 579 version (MARS) 580 const nteh = usednteh & NTEH_try; 581 else static if (NTEXCEPTIONS) 582 const nteh = usednteh & NTEHcpp; 583 else 584 enum nteh = true; 585 if (nteh) 586 { 587 assert(!(config.flags & CFGromable)); 588 //printf("framehandleroffset = x%x, coffset = x%x\n",framehandleroffset,coffset); 589 objmod.reftocodeseg(sfunc.Sseg,framehandleroffset,coffset); 590 } 591 } 592 593 // Write out switch tables 594 flag = false; // true if last active block was a ret 595 for (block* b = startblock; b; b = b.Bnext) 596 { 597 switch (b.BC) 598 { 599 case BCjmptab: /* if jump table */ 600 outjmptab(b); /* write out jump table */ 601 goto Ldefault; 602 603 case BCswitch: 604 outswitab(b); /* write out switch table */ 605 goto Ldefault; 606 607 case BCret: 608 case BCretexp: 609 /* Compute offset to return code from start of function */ 610 retoffset = b.Boffset + b.Bsize - retsize - funcoffset; 611 version (MARS) 612 { 613 /* Add 3 bytes to retoffset in case we have an exception 614 * handler. THIS PROBABLY NEEDS TO BE IN ANOTHER SPOT BUT 615 * IT FIXES THE PROBLEM HERE AS WELL. 616 */ 617 if (usednteh & NTEH_try) 618 retoffset += 3; 619 } 620 flag = true; 621 break; 622 623 default: 624 Ldefault: 625 retoffset = b.Boffset + b.Bsize - funcoffset; 626 break; 627 } 628 } 629 if (configv.addlinenumbers && !(sfunc.ty() & mTYnaked)) 630 /* put line number at end of function on the 631 start of the last instruction 632 */ 633 /* Instead, try offset to cleanup code */ 634 if (retoffset < sfunc.Ssize) 635 objmod.linnum(sfunc.Sfunc.Fendline,sfunc.Sseg,funcoffset + retoffset); 636 637 static if (TARGET_WINDOS && MARS) 638 { 639 if (config.exe == EX_WIN64) 640 win64_pdata(sfunc); 641 } 642 643 static if (MARS) 644 { 645 if (usednteh & NTEH_try) 646 { 647 // Do this before code is emitted because we patch some instructions 648 nteh_gentables(sfunc); 649 } 650 if (usednteh & EHtry && // saw BCtry or BC_try (test EHcleanup too?) 651 config.ehmethod == EHmethod.EH_DM) 652 { 653 except_gentables(); 654 } 655 if (config.ehmethod == EHmethod.EH_DWARF) 656 { 657 sfunc.Sfunc.Fstartblock = startblock; 658 dwarf_except_gentables(sfunc, cast(uint)startoffset, cast(uint)retoffset); 659 sfunc.Sfunc.Fstartblock = null; 660 } 661 } 662 663 version (SCPP) 664 { 665 // Write out frame handler 666 if (NTEXCEPTIONS && usednteh & NTEHcpp) 667 { 668 nteh_framehandler(sfunc, except_gentables()); 669 } 670 else 671 { 672 if (NTEXCEPTIONS && usednteh & NTEH_try) 673 { 674 nteh_gentables(sfunc); 675 } 676 else 677 { 678 if (CPP) 679 except_gentables(); 680 } 681 } 682 } 683 684 for (block* b = startblock; b; b = b.Bnext) 685 { 686 code_free(b.Bcode); 687 b.Bcode = null; 688 } 689 } 690 691 // Mask of regs saved 692 // BUG: do interrupt functions save BP? 693 sfunc.Sregsaved = (functy == TYifunc) ? cast(regm_t) mBP : (mfuncreg | fregsaved); 694 695 debug 696 if (global87.stackused != 0) 697 printf("stackused = %d\n",global87.stackused); 698 699 assert(global87.stackused == 0); /* nobody in 8087 stack */ 700 701 global87.save.__dtor(); // clean up ndp save array 702 } 703 704 /********************************************* 705 * Align sections on the stack. 706 * base negative offset of section from frame pointer 707 * alignment alignment to use 708 * bias difference between where frame pointer points and the STACKALIGNed 709 * part of the stack 710 * Returns: 711 * base revised downward so it is aligned 712 */ 713 targ_size_t alignsection(targ_size_t base, uint alignment, int bias) 714 { 715 assert(cast(int)base <= 0); 716 if (alignment > STACKALIGN) 717 alignment = STACKALIGN; 718 if (alignment) 719 { 720 int sz = cast(int)(-base + bias); 721 assert(sz >= 0); 722 sz &= (alignment - 1); 723 if (sz) 724 base -= alignment - sz; 725 } 726 return base; 727 } 728 729 /******************************* 730 * Generate code for a function start. 731 * Input: 732 * Offset(cseg) address of start of code 733 * Auto.alignment 734 * Output: 735 * Offset(cseg) adjusted for size of code generated 736 * EBPtoESP 737 * hasframe 738 * BPoff 739 */ 740 void prolog(ref CodeBuilder cdb) 741 { 742 bool enter; 743 744 //printf("cod3.prolog() %s, needframe = %d, Auto.alignment = %d\n", funcsym_p.Sident, needframe, Auto.alignment); 745 debug debugw && printf("funcstart()\n"); 746 regcon.immed.mval = 0; /* no values in registers yet */ 747 version (FRAMEPTR) 748 EBPtoESP = 0; 749 else 750 EBPtoESP = -REGSIZE; 751 hasframe = 0; 752 bool pushds = false; 753 BPoff = 0; 754 bool pushalloc = false; 755 tym_t tyf = funcsym_p.ty(); 756 tym_t tym = tybasic(tyf); 757 uint farfunc = tyfarfunc(tym); 758 759 // Special Intel 64 bit ABI prolog setup for variadic functions 760 Symbol *sv64 = null; // set to __va_argsave 761 if (I64 && variadic(funcsym_p.Stype)) 762 { 763 /* The Intel 64 bit ABI scheme. 764 * abi_sysV_amd64.pdf 765 * Load arguments passed in registers into the varargs save area 766 * so they can be accessed by va_arg(). 767 */ 768 /* Look for __va_argsave 769 */ 770 for (SYMIDX si = 0; si < globsym.top; si++) 771 { 772 Symbol *s = globsym.tab[si]; 773 if (s.Sident[0] == '_' && strcmp(s.Sident.ptr, "__va_argsave") == 0) 774 { 775 if (!(s.Sflags & SFLdead)) 776 sv64 = s; 777 break; 778 } 779 } 780 } 781 782 if (config.flags & CFGalwaysframe || 783 funcsym_p.Sfunc.Fflags3 & Ffakeeh || 784 /* The exception stack unwinding mechanism relies on the EBP chain being intact, 785 * so need frame if function can possibly throw 786 */ 787 !(config.exe == EX_WIN32) && !(funcsym_p.Sfunc.Fflags3 & Fnothrow) || 788 cgstate.accessedTLS || 789 sv64 790 ) 791 needframe = 1; 792 793 CodeBuilder cdbx; cdbx.ctor(); 794 795 Lagain: 796 spoff = 0; 797 char guessneedframe = needframe; 798 int cfa_offset = 0; 799 // if (needframe && config.exe & (EX_LINUX | EX_FREEBSD | EX_SOLARIS) && !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru))) 800 // usednteh |= NTEHpassthru; 801 802 /* Compute BP offsets for variables on stack. 803 * The organization is: 804 * Para.size parameters 805 * -------- stack is aligned to STACKALIGN 806 * seg of return addr (if far function) 807 * IP of return addr 808 * BP. caller's BP 809 * DS (if Windows prolog/epilog) 810 * exception handling context symbol 811 * Fast.size fastpar 812 * Auto.size autos and regs 813 * regsave.off any saved registers 814 * Foff floating register 815 * Alloca.size alloca temporary 816 * CSoff common subs 817 * NDPoff any 8087 saved registers 818 * monitor context record 819 * any saved registers 820 */ 821 822 if (tym == TYifunc) 823 Para.size = 26; // how is this number derived? 824 else 825 { 826 version (FRAMEPTR) 827 { 828 Para.size = ((farfunc ? 2 : 1) + needframe) * REGSIZE; 829 if (needframe) 830 EBPtoESP = -REGSIZE; 831 } 832 else 833 Para.size = ((farfunc ? 2 : 1) + 1) * REGSIZE; 834 } 835 836 /* The real reason for the FAST section is because the implementation of contracts 837 * requires a consistent stack frame location for the 'this' pointer. But if varying 838 * stuff in Auto.offset causes different alignment for that section, the entire block can 839 * shift around, causing a crash in the contracts. 840 * Fortunately, the 'this' is always an SCfastpar, so we put the fastpar's in their 841 * own FAST section, which is never aligned at a size bigger than REGSIZE, and so 842 * its alignment never shifts around. 843 * But more work needs to be done, see Bugzilla 9200. Really, each section should be aligned 844 * individually rather than as a group. 845 */ 846 Fast.size = 0; 847 static if (NTEXCEPTIONS == 2) 848 { 849 Fast.size -= nteh_contextsym_size(); 850 version (MARS) 851 { 852 static if (TARGET_WINDOS) 853 { 854 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh && nteh_contextsym_size() == 0) 855 Fast.size -= 5 * 4; 856 } 857 } 858 } 859 860 /* Despite what the comment above says, aligning Fast section to size greater 861 * than REGSIZE does not break contract implementation. Fast.offset and 862 * Fast.alignment must be the same for the overriding and 863 * the overridden function, since they have the same parameters. Fast.size 864 * must be the same because otherwise, contract inheritance wouldn't work 865 * even if we didn't align Fast section to size greater than REGSIZE. Therefore, 866 * the only way aligning the section could cause problems with contract 867 * inheritance is if bias (declared below) differed for the overridden 868 * and the overriding function. 869 * 870 * Bias depends on Para.size and needframe. The value of Para.size depends on 871 * whether the function is an interrupt handler and whether it is a farfunc. 872 * DMD does not have _interrupt attribute and D does not make a distinction 873 * between near and far functions, so Para.size should always be 2 * REGSIZE 874 * for D. 875 * 876 * The value of needframe depends on a global setting that is only set 877 * during backend's initialization and on function flag Ffakeeh. On Windows, 878 * that flag is always set for virtual functions, for which contracts are 879 * defined and on other platforms, it is never set. Because of that 880 * the value of neadframe should always be the same for the overridden 881 * and the overriding function, and so bias should be the same too. 882 */ 883 884 version (FRAMEPTR) 885 int bias = enforcealign ? 0 : cast(int)(Para.size); 886 else 887 int bias = enforcealign ? 0 : cast(int)(Para.size + (needframe ? 0 : REGSIZE)); 888 889 if (Fast.alignment < REGSIZE) 890 Fast.alignment = REGSIZE; 891 892 Fast.size = alignsection(Fast.size - Fast.offset, Fast.alignment, bias); 893 894 if (Auto.alignment < REGSIZE) 895 Auto.alignment = REGSIZE; // necessary because localsize must be REGSIZE aligned 896 Auto.size = alignsection(Fast.size - Auto.offset, Auto.alignment, bias); 897 898 regsave.off = alignsection(Auto.size - regsave.top, regsave.alignment, bias); 899 //printf("regsave.off = x%x, size = x%x, alignment = %x\n", 900 //cast(int)regsave.off, cast(int)(regsave.top), cast(int)regsave.alignment); 901 902 if (floatreg) 903 { 904 uint floatregsize = config.fpxmmregs || I32 ? 16 : DOUBLESIZE; 905 Foff = alignsection(regsave.off - floatregsize, STACKALIGN, bias); 906 //printf("Foff = x%x, size = x%x\n", cast(int)Foff, cast(int)floatregsize); 907 } 908 else 909 Foff = regsave.off; 910 911 Alloca.alignment = REGSIZE; 912 Alloca.offset = alignsection(Foff - Alloca.size, Alloca.alignment, bias); 913 914 CSoff = alignsection(Alloca.offset - CSE.size(), CSE.alignment(), bias); 915 //printf("CSoff = x%x, size = x%x, alignment = %x\n", 916 //cast(int)CSoff, CSE.size(), cast(int)CSE.alignment); 917 918 NDPoff = alignsection(CSoff - global87.save.length * tysize(TYldouble), REGSIZE, bias); 919 920 regm_t topush = fregsaved & ~mfuncreg; // mask of registers that need saving 921 pushoffuse = false; 922 pushoff = NDPoff; 923 /* We don't keep track of all the pushes and pops in a function. Hence, 924 * using POP REG to restore registers in the epilog doesn't work, because the Dwarf unwinder 925 * won't be setting ESP correctly. With pushoffuse, the registers are restored 926 * from EBP, which is kept track of properly. 927 */ 928 if ((config.flags4 & CFG4speed || config.ehmethod == EHmethod.EH_DWARF) && (I32 || I64)) 929 { 930 /* Instead of pushing the registers onto the stack one by one, 931 * allocate space in the stack frame and copy/restore them there. 932 */ 933 int xmmtopush = numbitsset(topush & XMMREGS); // XMM regs take 16 bytes 934 int gptopush = numbitsset(topush) - xmmtopush; // general purpose registers to save 935 if (NDPoff || xmmtopush || cgstate.funcarg.size) 936 { 937 pushoff = alignsection(pushoff - (gptopush * REGSIZE + xmmtopush * 16), 938 xmmtopush ? STACKALIGN : REGSIZE, bias); 939 pushoffuse = true; // tell others we're using this strategy 940 } 941 } 942 943 //printf("Fast.size = x%x, Auto.size = x%x\n", (int)Fast.size, (int)Auto.size); 944 945 cgstate.funcarg.alignment = cgstate.funcarg.size ? STACKALIGN : REGSIZE; 946 cgstate.funcarg.offset = alignsection(pushoff - cgstate.funcarg.size, cgstate.funcarg.alignment, bias); 947 948 localsize = -cgstate.funcarg.offset; 949 950 //printf("Alloca.offset = x%llx, cstop = x%llx, CSoff = x%llx, NDPoff = x%llx, localsize = x%llx\n", 951 //(long long)Alloca.offset, (long long)CSE.size(), (long long)CSoff, (long long)NDPoff, (long long)localsize); 952 assert(cast(targ_ptrdiff_t)localsize >= 0); 953 954 // Keep the stack aligned by 8 for any subsequent function calls 955 if (!I16 && calledafunc && 956 (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 957 { 958 int npush = numbitsset(topush); // number of registers that need saving 959 npush += numbitsset(topush & XMMREGS); // XMM regs take 16 bytes, so count them twice 960 if (pushoffuse) 961 npush = 0; 962 963 //printf("npush = %d Para.size = x%x needframe = %d localsize = x%x\n", 964 //npush, Para.size, needframe, localsize); 965 966 int sz = cast(int)(localsize + npush * REGSIZE); 967 if (!enforcealign) 968 { 969 version (FRAMEPTR) 970 sz += Para.size; 971 else 972 sz += Para.size + (needframe ? 0 : -REGSIZE); 973 } 974 if (sz & (STACKALIGN - 1)) 975 localsize += STACKALIGN - (sz & (STACKALIGN - 1)); 976 } 977 cgstate.funcarg.offset = -localsize; 978 979 //printf("Foff x%02x Auto.size x%02x NDPoff x%02x CSoff x%02x Para.size x%02x localsize x%02x\n", 980 //(int)Foff,(int)Auto.size,(int)NDPoff,(int)CSoff,(int)Para.size,(int)localsize); 981 982 uint xlocalsize = cast(uint)localsize; // amount to subtract from ESP to make room for locals 983 984 if (tyf & mTYnaked) // if no prolog/epilog for function 985 { 986 hasframe = 1; 987 return; 988 } 989 990 if (tym == TYifunc) 991 { 992 prolog_ifunc(cdbx,&tyf); 993 hasframe = 1; 994 cdb.append(cdbx); 995 goto Lcont; 996 } 997 998 /* Determine if we need BP set up */ 999 if (enforcealign) 1000 { 1001 // we need BP to reset the stack before return 1002 // otherwise the return address is lost 1003 needframe = 1; 1004 1005 } 1006 else if (config.flags & CFGalwaysframe) 1007 needframe = 1; 1008 else 1009 { 1010 if (localsize) 1011 { 1012 if (I16 || 1013 !(config.flags4 & CFG4speed) || 1014 config.target_cpu < TARGET_Pentium || 1015 farfunc || 1016 config.flags & CFGstack || 1017 xlocalsize >= 0x1000 || 1018 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 1019 anyiasm || 1020 Alloca.size 1021 ) 1022 needframe = 1; 1023 } 1024 if (refparam && (anyiasm || I16)) 1025 needframe = 1; 1026 } 1027 1028 if (needframe) 1029 { 1030 assert(mfuncreg & mBP); // shouldn't have used mBP 1031 1032 if (!guessneedframe) // if guessed wrong 1033 goto Lagain; 1034 } 1035 1036 if (I16 && config.wflags & WFwindows && farfunc) 1037 { 1038 prolog_16bit_windows_farfunc(cdbx, &tyf, &pushds); 1039 enter = false; // don't use ENTER instruction 1040 hasframe = 1; // we have a stack frame 1041 } 1042 else if (needframe) // if variables or parameters 1043 { 1044 prolog_frame(cdbx, farfunc, &xlocalsize, &enter, &cfa_offset); 1045 hasframe = 1; 1046 } 1047 1048 /* Align the stack if necessary */ 1049 prolog_stackalign(cdbx); 1050 1051 /* Subtract from stack pointer the size of the local stack frame 1052 */ 1053 if (config.flags & CFGstack) // if stack overflow check 1054 { 1055 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 1056 if (Alloca.size) 1057 prolog_setupalloca(cdbx); 1058 } 1059 else if (needframe) /* if variables or parameters */ 1060 { 1061 if (xlocalsize) /* if any stack offset */ 1062 { 1063 prolog_frameadj(cdbx, tyf, xlocalsize, enter, &pushalloc); 1064 if (Alloca.size) 1065 prolog_setupalloca(cdbx); 1066 } 1067 else 1068 assert(Alloca.size == 0); 1069 } 1070 else if (xlocalsize) 1071 { 1072 assert(I32 || I64); 1073 prolog_frameadj2(cdbx, tyf, xlocalsize, &pushalloc); 1074 version (FRAMEPTR) { } else 1075 BPoff += REGSIZE; 1076 } 1077 else 1078 assert((localsize | Alloca.size) == 0 || (usednteh & NTEHjmonitor)); 1079 EBPtoESP += xlocalsize; 1080 if (hasframe) 1081 EBPtoESP += REGSIZE; 1082 1083 /* Win64 unwind needs the amount of code generated so far 1084 */ 1085 if (config.exe == EX_WIN64) 1086 { 1087 code *c = cdbx.peek(); 1088 pinholeopt(c, null); 1089 prolog_allocoffset = calcblksize(c); 1090 } 1091 1092 version (SCPP) 1093 { 1094 /* The idea is to generate trace for all functions if -Nc is not thrown. 1095 * If -Nc is thrown, generate trace only for global COMDATs, because those 1096 * are relevant to the FUNCTIONS statement in the linker .DEF file. 1097 * This same logic should be in epilog(). 1098 */ 1099 if (config.flags & CFGtrace && 1100 (!(config.flags4 & CFG4allcomdat) || 1101 funcsym_p.Sclass == SCcomdat || 1102 funcsym_p.Sclass == SCglobal || 1103 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 1104 ) 1105 ) 1106 { 1107 uint spalign = 0; 1108 int sz = cast(int)localsize; 1109 if (!enforcealign) 1110 { 1111 version (FRAMEPTR) 1112 sz += Para.size; 1113 else 1114 sz += Para.size + (needframe ? 0 : -REGSIZE); 1115 } 1116 if (STACKALIGN >= 16 && (sz & (STACKALIGN - 1))) 1117 spalign = STACKALIGN - (sz & (STACKALIGN - 1)); 1118 1119 if (spalign) 1120 { /* This could be avoided by moving the function call to after the 1121 * registers are saved. But I don't remember why the call is here 1122 * and not there. 1123 */ 1124 cod3_stackadj(cdbx, spalign); 1125 } 1126 1127 uint regsaved; 1128 prolog_trace(cdbx, farfunc != 0, ®saved); 1129 1130 if (spalign) 1131 cod3_stackadj(cdbx, -spalign); 1132 useregs((ALLREGS | mBP | mES) & ~regsaved); 1133 } 1134 } 1135 1136 version (MARS) 1137 { 1138 if (usednteh & NTEHjmonitor) 1139 { Symbol *sthis; 1140 1141 for (SYMIDX si = 0; 1; si++) 1142 { assert(si < globsym.top); 1143 sthis = globsym.tab[si]; 1144 if (strcmp(sthis.Sident.ptr,"this".ptr) == 0) 1145 break; 1146 } 1147 nteh_monitor_prolog(cdbx,sthis); 1148 EBPtoESP += 3 * 4; 1149 } 1150 } 1151 1152 cdb.append(cdbx); 1153 prolog_saveregs(cdb, topush, cfa_offset); 1154 1155 Lcont: 1156 1157 if (config.exe == EX_WIN64) 1158 { 1159 if (variadic(funcsym_p.Stype)) 1160 prolog_gen_win64_varargs(cdb); 1161 regm_t namedargs; 1162 prolog_loadparams(cdb, tyf, pushalloc, namedargs); 1163 return; 1164 } 1165 1166 prolog_ifunc2(cdb, tyf, tym, pushds); 1167 1168 static if (NTEXCEPTIONS == 2) 1169 { 1170 if (usednteh & NTEH_except) 1171 nteh_setsp(cdb, 0x89); // MOV __context[EBP].esp,ESP 1172 } 1173 1174 // Load register parameters off of the stack. Do not use 1175 // assignaddr(), as it will replace the stack reference with 1176 // the register! 1177 regm_t namedargs; 1178 prolog_loadparams(cdb, tyf, pushalloc, namedargs); 1179 1180 if (sv64) 1181 prolog_genvarargs(cdb, sv64, namedargs); 1182 1183 /* Alignment checks 1184 */ 1185 //assert(Auto.alignment <= STACKALIGN); 1186 //assert(((Auto.size + Para.size + BPoff) & (Auto.alignment - 1)) == 0); 1187 } 1188 1189 /************************************ 1190 * Predicate for sorting auto symbols for qsort(). 1191 * Returns: 1192 * < 0 s1 goes farther from frame pointer 1193 * > 0 s1 goes nearer the frame pointer 1194 * = 0 no difference 1195 */ 1196 1197 extern (C) int 1198 autosort_cmp(scope const void *ps1, scope const void *ps2) 1199 { 1200 Symbol *s1 = *cast(Symbol **)ps1; 1201 Symbol *s2 = *cast(Symbol **)ps2; 1202 1203 /* Largest align size goes furthest away from frame pointer, 1204 * so they get allocated first. 1205 */ 1206 uint alignsize1 = Symbol_Salignsize(s1); 1207 uint alignsize2 = Symbol_Salignsize(s2); 1208 if (alignsize1 < alignsize2) 1209 return 1; 1210 else if (alignsize1 > alignsize2) 1211 return -1; 1212 1213 /* move variables nearer the frame pointer that have higher Sweights 1214 * because addressing mode is fewer bytes. Grouping together high Sweight 1215 * variables also may put them in the same cache 1216 */ 1217 if (s1.Sweight < s2.Sweight) 1218 return -1; 1219 else if (s1.Sweight > s2.Sweight) 1220 return 1; 1221 1222 /* More: 1223 * 1. put static arrays nearest the frame pointer, so buffer overflows 1224 * can't change other variable contents 1225 * 2. Do the coloring at the byte level to minimize stack usage 1226 */ 1227 return 0; 1228 } 1229 1230 /****************************** 1231 * Compute offsets for remaining tmp, automatic and register variables 1232 * that did not make it into registers. 1233 * Input: 1234 * flags 0: do estimate only 1235 * 1: final 1236 */ 1237 void stackoffsets(int flags) 1238 { 1239 //printf("stackoffsets() %s\n", funcsym_p.Sident); 1240 1241 Para.init(); // parameter offset 1242 Fast.init(); // SCfastpar offset 1243 Auto.init(); // automatic & register offset 1244 EEStack.init(); // for SCstack's 1245 1246 // Set if doing optimization of auto layout 1247 bool doAutoOpt = flags && config.flags4 & CFG4optimized; 1248 1249 // Put autos in another array so we can do optimizations on the stack layout 1250 Symbol*[10] autotmp; 1251 Symbol **autos = null; 1252 if (doAutoOpt) 1253 { 1254 if (globsym.top <= autotmp.length) 1255 autos = autotmp.ptr; 1256 else 1257 { autos = cast(Symbol **)malloc(globsym.top * (*autos).sizeof); 1258 assert(autos); 1259 } 1260 } 1261 size_t autosi = 0; // number used in autos[] 1262 1263 for (int si = 0; si < globsym.top; si++) 1264 { Symbol *s = globsym.tab[si]; 1265 1266 /* Don't allocate space for dead or zero size parameters 1267 */ 1268 switch (s.Sclass) 1269 { 1270 case SCfastpar: 1271 if (!(funcsym_p.Sfunc.Fflags3 & Ffakeeh)) 1272 goto Ldefault; // don't need consistent stack frame 1273 break; 1274 1275 case SCparameter: 1276 if (type_zeroSize(s.Stype, tybasic(funcsym_p.Stype.Tty))) 1277 { 1278 Para.offset = _align(REGSIZE,Para.offset); // align on word stack boundary 1279 s.Soffset = Para.offset; 1280 continue; 1281 } 1282 break; // allocate even if it's dead 1283 1284 case SCshadowreg: 1285 break; // allocate even if it's dead 1286 1287 default: 1288 Ldefault: 1289 if (Symbol_Sisdead(s, anyiasm)) 1290 continue; // don't allocate space 1291 break; 1292 } 1293 1294 targ_size_t sz = type_size(s.Stype); 1295 if (sz == 0) 1296 sz++; // can't handle 0 length structs 1297 1298 uint alignsize = Symbol_Salignsize(s); 1299 if (alignsize > STACKALIGN) 1300 alignsize = STACKALIGN; // no point if the stack is less aligned 1301 1302 //printf("symbol '%s', size = x%lx, alignsize = %d, read = %x\n",s.Sident,(long)sz, (int)alignsize, s.Sflags & SFLread); 1303 assert(cast(int)sz >= 0); 1304 1305 switch (s.Sclass) 1306 { 1307 case SCfastpar: 1308 /* Get these 1309 * right next to the stack frame pointer, EBP. 1310 * Needed so we can call nested contract functions 1311 * frequire and fensure. 1312 */ 1313 if (s.Sfl == FLreg) // if allocated in register 1314 continue; 1315 /* Needed because storing fastpar's on the stack in prolog() 1316 * does the entire register 1317 */ 1318 if (sz < REGSIZE) 1319 sz = REGSIZE; 1320 1321 Fast.offset = _align(sz,Fast.offset); 1322 s.Soffset = Fast.offset; 1323 Fast.offset += sz; 1324 //printf("fastpar '%s' sz = %d, fast offset = x%x, %p\n",s.Sident,(int)sz,(int)s.Soffset, s); 1325 1326 if (alignsize > Fast.alignment) 1327 Fast.alignment = alignsize; 1328 break; 1329 1330 case SCregister: 1331 case SCauto: 1332 if (s.Sfl == FLreg) // if allocated in register 1333 break; 1334 1335 if (doAutoOpt) 1336 { autos[autosi++] = s; // deal with later 1337 break; 1338 } 1339 1340 Auto.offset = _align(sz,Auto.offset); 1341 s.Soffset = Auto.offset; 1342 Auto.offset += sz; 1343 //printf("auto '%s' sz = %d, auto offset = x%lx\n",s.Sident,sz,(long)s.Soffset); 1344 1345 if (alignsize > Auto.alignment) 1346 Auto.alignment = alignsize; 1347 break; 1348 1349 case SCstack: 1350 EEStack.offset = _align(sz,EEStack.offset); 1351 s.Soffset = EEStack.offset; 1352 //printf("EEStack.offset = x%lx\n",(long)s.Soffset); 1353 EEStack.offset += sz; 1354 break; 1355 1356 case SCshadowreg: 1357 case SCparameter: 1358 if (config.exe == EX_WIN64) 1359 { 1360 assert((Para.offset & 7) == 0); 1361 s.Soffset = Para.offset; 1362 Para.offset += 8; 1363 break; 1364 } 1365 /* Alignment on OSX 32 is odd. reals are 16 byte aligned in general, 1366 * but are 4 byte aligned on the OSX 32 stack. 1367 */ 1368 Para.offset = _align(REGSIZE,Para.offset); /* align on word stack boundary */ 1369 if (alignsize >= 16 && 1370 (I64 || (config.exe == EX_OSX && 1371 (tyaggregate(s.ty()) || tyvector(s.ty()))))) 1372 Para.offset = (Para.offset + (alignsize - 1)) & ~(alignsize - 1); 1373 s.Soffset = Para.offset; 1374 //printf("%s param offset = x%lx, alignsize = %d\n",s.Sident,(long)s.Soffset, (int)alignsize); 1375 Para.offset += (s.Sflags & SFLdouble) 1376 ? type_size(tstypes[TYdouble]) // float passed as double 1377 : type_size(s.Stype); 1378 break; 1379 1380 case SCpseudo: 1381 case SCstatic: 1382 case SCbprel: 1383 break; 1384 default: 1385 symbol_print(s); 1386 assert(0); 1387 } 1388 } 1389 1390 if (autosi) 1391 { 1392 qsort(autos, autosi, (Symbol *).sizeof, &autosort_cmp); 1393 1394 vec_t tbl = vec_calloc(autosi); 1395 1396 for (size_t si = 0; si < autosi; si++) 1397 { 1398 Symbol *s = autos[si]; 1399 1400 targ_size_t sz = type_size(s.Stype); 1401 if (sz == 0) 1402 sz++; // can't handle 0 length structs 1403 1404 uint alignsize = Symbol_Salignsize(s); 1405 if (alignsize > STACKALIGN) 1406 alignsize = STACKALIGN; // no point if the stack is less aligned 1407 1408 /* See if we can share storage with another variable 1409 * if their live ranges do not overlap. 1410 */ 1411 if (// Don't share because could stomp on variables 1412 // used in finally blocks 1413 !(usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) && 1414 s.Srange && !(s.Sflags & SFLspill)) 1415 { 1416 for (size_t i = 0; i < si; i++) 1417 { 1418 if (!vec_testbit(i,tbl)) 1419 continue; 1420 Symbol *sp = autos[i]; 1421 //printf("auto s = '%s', sp = '%s', %d, %d, %d\n",s.Sident,sp.Sident,dfo.length,vec_numbits(s.Srange),vec_numbits(sp.Srange)); 1422 if (vec_disjoint(s.Srange,sp.Srange) && 1423 !(sp.Soffset & (alignsize - 1)) && 1424 sz <= type_size(sp.Stype)) 1425 { 1426 vec_or(sp.Srange,sp.Srange,s.Srange); 1427 //printf("sharing space - '%s' onto '%s'\n",s.Sident,sp.Sident); 1428 s.Soffset = sp.Soffset; 1429 goto L2; 1430 } 1431 } 1432 } 1433 Auto.offset = _align(sz,Auto.offset); 1434 s.Soffset = Auto.offset; 1435 //printf("auto '%s' sz = %d, auto offset = x%lx\n",s.Sident,sz,(long)s.Soffset); 1436 Auto.offset += sz; 1437 if (s.Srange && !(s.Sflags & SFLspill)) 1438 vec_setbit(si,tbl); 1439 1440 if (alignsize > Auto.alignment) 1441 Auto.alignment = alignsize; 1442 L2: { } 1443 } 1444 1445 vec_free(tbl); 1446 1447 if (autos != autotmp.ptr) 1448 free(autos); 1449 } 1450 } 1451 1452 /**************************** 1453 * Generate code for a block. 1454 */ 1455 1456 private void blcodgen(block *bl) 1457 { 1458 regm_t mfuncregsave = mfuncreg; 1459 1460 //dbg_printf("blcodgen(%p)\n",bl); 1461 1462 /* Determine existing immediate values in registers by ANDing 1463 together the values from all the predecessors of b. 1464 */ 1465 assert(bl.Bregcon.immed.mval == 0); 1466 regcon.immed.mval = 0; // assume no previous contents in registers 1467 // regcon.cse.mval = 0; 1468 foreach (bpl; ListRange(bl.Bpred)) 1469 { 1470 block *bp = list_block(bpl); 1471 1472 if (bpl == bl.Bpred) 1473 { regcon.immed = bp.Bregcon.immed; 1474 regcon.params = bp.Bregcon.params; 1475 // regcon.cse = bp.Bregcon.cse; 1476 } 1477 else 1478 { 1479 int i; 1480 1481 regcon.params &= bp.Bregcon.params; 1482 if ((regcon.immed.mval &= bp.Bregcon.immed.mval) != 0) 1483 // Actual values must match, too 1484 for (i = 0; i < REGMAX; i++) 1485 { 1486 if (regcon.immed.value[i] != bp.Bregcon.immed.value[i]) 1487 regcon.immed.mval &= ~mask(i); 1488 } 1489 } 1490 } 1491 regcon.cse.mops &= regcon.cse.mval; 1492 1493 // Set regcon.mvar according to what variables are in registers for this block 1494 CodeBuilder cdb; cdb.ctor(); 1495 regcon.mvar = 0; 1496 regcon.mpvar = 0; 1497 regcon.indexregs = 1; 1498 int anyspill = 0; 1499 char *sflsave = null; 1500 if (config.flags4 & CFG4optimized) 1501 { 1502 CodeBuilder cdbload; cdbload.ctor(); 1503 CodeBuilder cdbstore; cdbstore.ctor(); 1504 1505 sflsave = cast(char *) alloca(globsym.top * char.sizeof); 1506 for (SYMIDX i = 0; i < globsym.top; i++) 1507 { 1508 Symbol *s = globsym.tab[i]; 1509 1510 sflsave[i] = s.Sfl; 1511 if (regParamInPreg(s) && 1512 regcon.params & s.Spregm() && 1513 vec_testbit(dfoidx,s.Srange)) 1514 { 1515 // regcon.used |= s.Spregm(); 1516 } 1517 1518 if (s.Sfl == FLreg) 1519 { 1520 if (vec_testbit(dfoidx,s.Srange)) 1521 { 1522 regcon.mvar |= s.Sregm; 1523 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 1524 regcon.mpvar |= s.Sregm; 1525 } 1526 } 1527 else if (s.Sflags & SFLspill) 1528 { 1529 if (vec_testbit(dfoidx,s.Srange)) 1530 { 1531 anyspill = i + 1; 1532 cgreg_spillreg_prolog(bl,s,cdbstore,cdbload); 1533 if (vec_testbit(dfoidx,s.Slvreg)) 1534 { 1535 s.Sfl = FLreg; 1536 regcon.mvar |= s.Sregm; 1537 regcon.cse.mval &= ~s.Sregm; 1538 regcon.immed.mval &= ~s.Sregm; 1539 regcon.params &= ~s.Sregm; 1540 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 1541 regcon.mpvar |= s.Sregm; 1542 } 1543 } 1544 } 1545 } 1546 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 1547 { 1548 cse_save(cdb,regcon.cse.mops & ~regcon.cse.mval); 1549 } 1550 cdb.append(cdbstore); 1551 cdb.append(cdbload); 1552 mfuncreg &= ~regcon.mvar; // use these registers 1553 regcon.used |= regcon.mvar; 1554 1555 // Determine if we have more than 1 uncommitted index register 1556 regcon.indexregs = IDXREGS & ~regcon.mvar; 1557 regcon.indexregs &= regcon.indexregs - 1; 1558 } 1559 1560 /* This doesn't work when calling the BC_finally function, 1561 * as it is one block calling another. 1562 */ 1563 //regsave.idx = 0; 1564 1565 reflocal = 0; 1566 int refparamsave = refparam; 1567 refparam = 0; 1568 assert((regcon.cse.mops & regcon.cse.mval) == regcon.cse.mops); 1569 1570 outblkexitcode(cdb, bl, anyspill, sflsave, &retsym, mfuncregsave); 1571 bl.Bcode = cdb.finish(); 1572 1573 for (int i = 0; i < anyspill; i++) 1574 { 1575 Symbol *s = globsym.tab[i]; 1576 s.Sfl = sflsave[i]; // undo block register assignments 1577 } 1578 1579 if (reflocal) 1580 bl.Bflags |= BFLreflocal; 1581 if (refparam) 1582 bl.Bflags |= BFLrefparam; 1583 refparam |= refparamsave; 1584 bl.Bregcon.immed = regcon.immed; 1585 bl.Bregcon.cse = regcon.cse; 1586 bl.Bregcon.used = regcon.used; 1587 bl.Bregcon.params = regcon.params; 1588 1589 debug 1590 debugw && printf("code gen complete\n"); 1591 } 1592 1593 /***************************************** 1594 * Add in exception handling code. 1595 */ 1596 1597 version (SCPP) 1598 { 1599 1600 private void cgcod_eh() 1601 { 1602 list_t stack; 1603 int idx; 1604 int tryidx; 1605 1606 if (!(usednteh & (EHtry | EHcleanup))) 1607 return; 1608 1609 // Compute Bindex for each block 1610 for (block *b = startblock; b; b = b.Bnext) 1611 { 1612 b.Bindex = -1; 1613 b.Bflags &= ~BFLvisited; /* mark as unvisited */ 1614 } 1615 block *btry = null; 1616 int lastidx = 0; 1617 startblock.Bindex = 0; 1618 for (block *b = startblock; b; b = b.Bnext) 1619 { 1620 if (btry == b.Btry && b.BC == BCcatch) // if don't need to pop try block 1621 { 1622 block *br = list_block(b.Bpred); // find corresponding try block 1623 assert(br.BC == BCtry); 1624 b.Bindex = br.Bindex; 1625 } 1626 else if (btry != b.Btry && b.BC != BCcatch || 1627 !(b.Bflags & BFLvisited)) 1628 b.Bindex = lastidx; 1629 b.Bflags |= BFLvisited; 1630 1631 debug 1632 if (debuge) 1633 { 1634 WRBC(b.BC); 1635 printf(" block (%p) Btry=%p Bindex=%d\n",b,b.Btry,b.Bindex); 1636 } 1637 1638 except_index_set(b.Bindex); 1639 if (btry != b.Btry) // exited previous try block 1640 { 1641 except_pop(b,null,btry); 1642 btry = b.Btry; 1643 } 1644 if (b.BC == BCtry) 1645 { 1646 except_push(b,null,b); 1647 btry = b; 1648 tryidx = except_index_get(); 1649 CodeBuilder cdb; cdb.ctor(); 1650 nteh_gensindex(cdb,tryidx - 1); 1651 cdb.append(b.Bcode); 1652 b.Bcode = cdb.finish(); 1653 } 1654 1655 stack = null; 1656 for (code *c = b.Bcode; c; c = code_next(c)) 1657 { 1658 if ((c.Iop & ESCAPEmask) == ESCAPE) 1659 { 1660 code *c1 = null; 1661 switch (c.Iop & 0xFFFF00) 1662 { 1663 case ESCctor: 1664 //printf("ESCctor\n"); 1665 except_push(c,c.IEV1.Vtor,null); 1666 goto L1; 1667 1668 case ESCdtor: 1669 //printf("ESCdtor\n"); 1670 except_pop(c,c.IEV1.Vtor,null); 1671 L1: if (config.exe == EX_WIN32) 1672 { 1673 CodeBuilder cdb; cdb.ctor(); 1674 nteh_gensindex(cdb,except_index_get() - 1); 1675 c1 = cdb.finish(); 1676 c1.next = code_next(c); 1677 c.next = c1; 1678 } 1679 break; 1680 1681 case ESCmark: 1682 //printf("ESCmark\n"); 1683 idx = except_index_get(); 1684 list_prependdata(&stack,idx); 1685 except_mark(); 1686 break; 1687 1688 case ESCrelease: 1689 //printf("ESCrelease\n"); 1690 version (SCPP) 1691 { 1692 idx = list_data(stack); 1693 list_pop(&stack); 1694 if (idx != except_index_get()) 1695 { 1696 if (config.exe == EX_WIN32) 1697 { 1698 CodeBuilder cdb; cdb.ctor(); 1699 nteh_gensindex(cdb,idx - 1); 1700 c1 = cdb.finish(); 1701 c1.next = code_next(c); 1702 c.next = c1; 1703 } 1704 else 1705 { except_pair_append(c,idx - 1); 1706 c.Iop = ESCAPE | ESCoffset; 1707 } 1708 } 1709 except_release(); 1710 } 1711 break; 1712 1713 case ESCmark2: 1714 //printf("ESCmark2\n"); 1715 except_mark(); 1716 break; 1717 1718 case ESCrelease2: 1719 //printf("ESCrelease2\n"); 1720 version (SCPP) 1721 { 1722 except_release(); 1723 } 1724 break; 1725 1726 default: 1727 break; 1728 } 1729 } 1730 } 1731 assert(stack == null); 1732 b.Bendindex = except_index_get(); 1733 1734 if (b.BC != BCret && b.BC != BCretexp) 1735 lastidx = b.Bendindex; 1736 1737 // Set starting index for each of the successors 1738 int i = 0; 1739 foreach (bl; ListRange(b.Bsucc)) 1740 { 1741 block *bs = list_block(bl); 1742 if (b.BC == BCtry) 1743 { 1744 switch (i) 1745 { 1746 case 0: // block after catches 1747 bs.Bindex = b.Bendindex; 1748 break; 1749 1750 case 1: // 1st catch block 1751 bs.Bindex = tryidx; 1752 break; 1753 1754 default: // subsequent catch blocks 1755 bs.Bindex = b.Bindex; 1756 break; 1757 } 1758 1759 debug 1760 if (debuge) 1761 { 1762 printf(" 1setting %p to %d\n",bs,bs.Bindex); 1763 } 1764 } 1765 else if (!(bs.Bflags & BFLvisited)) 1766 { 1767 bs.Bindex = b.Bendindex; 1768 1769 debug 1770 if (debuge) 1771 { 1772 printf(" 2setting %p to %d\n",bs,bs.Bindex); 1773 } 1774 } 1775 bs.Bflags |= BFLvisited; 1776 i++; 1777 } 1778 } 1779 1780 if (config.exe == EX_WIN32) 1781 for (block *b = startblock; b; b = b.Bnext) 1782 { 1783 if (/*!b.Bcount ||*/ b.BC == BCtry) 1784 continue; 1785 foreach (bl; ListRange(b.Bpred)) 1786 { 1787 int pi = list_block(bl).Bendindex; 1788 if (b.Bindex != pi) 1789 { 1790 CodeBuilder cdb; cdb.ctor(); 1791 nteh_gensindex(cdb,b.Bindex - 1); 1792 cdb.append(b.Bcode); 1793 b.Bcode = cdb.finish(); 1794 break; 1795 } 1796 } 1797 } 1798 } 1799 1800 } 1801 1802 /****************************** 1803 * Count the number of bits set in a register mask. 1804 */ 1805 1806 int numbitsset(regm_t regm) 1807 { 1808 int n = 0; 1809 if (regm) 1810 do 1811 n++; 1812 while ((regm &= regm - 1) != 0); 1813 return n; 1814 } 1815 1816 /****************************** 1817 * Given a register mask, find and return the number 1818 * of the first register that fits. 1819 */ 1820 1821 reg_t findreg(regm_t regm) 1822 { 1823 return findreg(regm, __LINE__, __FILE__); 1824 } 1825 1826 reg_t findreg(regm_t regm, int line, const(char)* file) 1827 { 1828 debug 1829 regm_t regmsave = regm; 1830 1831 reg_t i = 0; 1832 while (1) 1833 { 1834 if (!(regm & 0xF)) 1835 { 1836 regm >>= 4; 1837 i += 4; 1838 if (!regm) 1839 break; 1840 } 1841 if (regm & 1) 1842 return i; 1843 regm >>= 1; 1844 i++; 1845 } 1846 1847 debug 1848 printf("findreg(%s, line=%d, file='%s', function = '%s')\n",regm_str(regmsave),line,file,funcsym_p.Sident.ptr); 1849 fflush(stdout); 1850 1851 // *(char*)0=0; 1852 assert(0); 1853 } 1854 1855 /*************** 1856 * Free element (but not it's leaves! (assume they are already freed)) 1857 * Don't decrement Ecount! This is so we can detect if the common subexp 1858 * has already been evaluated. 1859 * If common subexpression is not required anymore, eliminate 1860 * references to it. 1861 */ 1862 1863 void freenode(elem *e) 1864 { 1865 elem_debug(e); 1866 //dbg_printf("freenode(%p) : comsub = %d, count = %d\n",e,e.Ecomsub,e.Ecount); 1867 if (e.Ecomsub--) return; /* usage count */ 1868 if (e.Ecount) /* if it was a CSE */ 1869 { 1870 for (size_t i = 0; i < regcon.cse.value.length; i++) 1871 { 1872 if (regcon.cse.value[i] == e) /* if a register is holding it */ 1873 { 1874 regcon.cse.mval &= ~mask(cast(uint)i); 1875 regcon.cse.mops &= ~mask(cast(uint)i); /* free masks */ 1876 } 1877 } 1878 CSE.remove(e); 1879 } 1880 } 1881 1882 /********************************* 1883 * Reset Ecomsub for all elem nodes, i.e. reverse the effects of freenode(). 1884 */ 1885 1886 private void resetEcomsub(elem *e) 1887 { 1888 while (1) 1889 { 1890 elem_debug(e); 1891 e.Ecomsub = e.Ecount; 1892 const op = e.Eoper; 1893 if (!OTleaf(op)) 1894 { 1895 if (OTbinary(op)) 1896 resetEcomsub(e.EV.E2); 1897 e = e.EV.E1; 1898 } 1899 else 1900 break; 1901 } 1902 } 1903 1904 /********************************* 1905 * Determine if elem e is a register variable. 1906 * If so: 1907 * *pregm = mask of registers that make up the variable 1908 * *preg = the least significant register 1909 * returns true 1910 * Else 1911 * returns false 1912 */ 1913 1914 int isregvar(elem *e,regm_t *pregm,reg_t *preg) 1915 { 1916 Symbol *s; 1917 uint u; 1918 regm_t m; 1919 regm_t regm; 1920 reg_t reg; 1921 1922 elem_debug(e); 1923 if (e.Eoper == OPvar || e.Eoper == OPrelconst) 1924 { 1925 s = e.EV.Vsym; 1926 switch (s.Sfl) 1927 { 1928 case FLreg: 1929 if (s.Sclass == SCparameter) 1930 { refparam = true; 1931 reflocal = true; 1932 } 1933 reg = e.EV.Voffset == REGSIZE ? s.Sregmsw : s.Sreglsw; 1934 regm = s.Sregm; 1935 //assert(tyreg(s.ty())); 1936 static if (0) 1937 { 1938 // Let's just see if there is a CSE in a reg we can use 1939 // instead. This helps avoid AGI's. 1940 if (e.Ecount && e.Ecount != e.Ecomsub) 1941 { int i; 1942 1943 for (i = 0; i < arraysize(regcon.cse.value); i++) 1944 { 1945 if (regcon.cse.value[i] == e) 1946 { reg = i; 1947 break; 1948 } 1949 } 1950 } 1951 } 1952 assert(regm & regcon.mvar && !(regm & ~regcon.mvar)); 1953 goto Lreg; 1954 1955 case FLpseudo: 1956 version (MARS) 1957 { 1958 u = s.Sreglsw; 1959 m = mask(u); 1960 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H 1961 { 1962 reg = u & 7; 1963 regm = m; 1964 goto Lreg; 1965 } 1966 } 1967 else 1968 { 1969 u = s.Sreglsw; 1970 m = pseudomask[u]; 1971 if (m & ALLREGS && (u & ~3) != 4) // if not BP,SP,EBP,ESP,or ?H 1972 { 1973 reg = pseudoreg[u] & 7; 1974 regm = m; 1975 goto Lreg; 1976 } 1977 } 1978 break; 1979 1980 default: 1981 break; 1982 } 1983 } 1984 return false; 1985 1986 Lreg: 1987 if (preg) 1988 *preg = reg; 1989 if (pregm) 1990 *pregm = regm; 1991 return true; 1992 } 1993 1994 /********************************* 1995 * Allocate some registers. 1996 * Input: 1997 * pretregs Pointer to mask of registers to make selection from. 1998 * tym Mask of type we will store in registers. 1999 * Output: 2000 * *pretregs Mask of allocated registers. 2001 * *preg Register number of first allocated register. 2002 * msavereg,mfuncreg retregs bits are cleared. 2003 * regcon.cse.mval,regcon.cse.mops updated 2004 * Returns: 2005 * pointer to code generated if necessary to save any regcon.cse.mops on the 2006 * stack. 2007 */ 2008 2009 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym) 2010 { 2011 allocreg(cdb, pretregs, preg, tym, __LINE__, __FILE__); 2012 } 2013 2014 void allocreg(ref CodeBuilder cdb,regm_t *pretregs,reg_t *preg,tym_t tym 2015 ,int line,const(char)* file) 2016 { 2017 reg_t reg; 2018 2019 static if (0) 2020 { 2021 if (pass == PASSfinal) 2022 { 2023 printf("allocreg %s,%d: regcon.mvar %s regcon.cse.mval %s msavereg %s *pretregs %s tym ", 2024 file,line,regm_str(regcon.mvar),regm_str(regcon.cse.mval), 2025 regm_str(msavereg),regm_str(*pretregs)); 2026 WRTYxx(tym); 2027 dbg_printf("\n"); 2028 } 2029 } 2030 tym = tybasic(tym); 2031 uint size = _tysize[tym]; 2032 *pretregs &= mES | allregs | XMMREGS; 2033 regm_t retregs = *pretregs; 2034 2035 debug if (retregs == 0) 2036 printf("allocreg: file %s(%d)\n", file, line); 2037 2038 if ((retregs & regcon.mvar) == retregs) // if exactly in reg vars 2039 { 2040 if (size <= REGSIZE || (retregs & XMMREGS)) 2041 { 2042 *preg = findreg(retregs); 2043 assert(retregs == mask(*preg)); /* no more bits are set */ 2044 } 2045 else if (size <= 2 * REGSIZE) 2046 { 2047 *preg = findregmsw(retregs); 2048 assert(retregs & mLSW); 2049 } 2050 else 2051 assert(0); 2052 getregs(cdb,retregs); 2053 return; 2054 } 2055 int count = 0; 2056 L1: 2057 //printf("L1: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 2058 assert(++count < 20); /* fail instead of hanging if blocked */ 2059 assert(retregs); 2060 reg_t msreg = NOREG, lsreg = NOREG; /* no value assigned yet */ 2061 L3: 2062 //printf("L2: allregs = %s, *pretregs = %s\n", regm_str(allregs), regm_str(*pretregs)); 2063 regm_t r = retregs & ~(msavereg | regcon.cse.mval | regcon.params); 2064 if (!r) 2065 { 2066 r = retregs & ~(msavereg | regcon.cse.mval); 2067 if (!r) 2068 { 2069 r = retregs & ~(msavereg | regcon.cse.mops); 2070 if (!r) 2071 { r = retregs & ~msavereg; 2072 if (!r) 2073 r = retregs; 2074 } 2075 } 2076 } 2077 2078 if (size <= REGSIZE || retregs & XMMREGS) 2079 { 2080 if (r & ~mBP) 2081 r &= ~mBP; 2082 2083 // If only one index register, prefer to not use LSW registers 2084 if (!regcon.indexregs && r & ~mLSW) 2085 r &= ~mLSW; 2086 2087 if (pass == PASSfinal && r & ~lastretregs && !I16) 2088 { // Try not to always allocate the same register, 2089 // to schedule better 2090 2091 r &= ~lastretregs; 2092 if (r & ~last2retregs) 2093 { 2094 r &= ~last2retregs; 2095 if (r & ~last3retregs) 2096 { 2097 r &= ~last3retregs; 2098 if (r & ~last4retregs) 2099 { 2100 r &= ~last4retregs; 2101 // if (r & ~last5retregs) 2102 // r &= ~last5retregs; 2103 } 2104 } 2105 } 2106 if (r & ~mfuncreg) 2107 r &= ~mfuncreg; 2108 } 2109 reg = findreg(r); 2110 retregs = mask(reg); 2111 } 2112 else if (size <= 2 * REGSIZE) 2113 { 2114 /* Select pair with both regs free. Failing */ 2115 /* that, select pair with one reg free. */ 2116 2117 if (r & mBP) 2118 { 2119 retregs &= ~mBP; 2120 goto L3; 2121 } 2122 2123 if (r & mMSW) 2124 { 2125 if (r & mDX) 2126 msreg = DX; /* prefer to use DX over CX */ 2127 else 2128 msreg = findregmsw(r); 2129 r &= mLSW; /* see if there's an LSW also */ 2130 if (r) 2131 lsreg = findreg(r); 2132 else if (lsreg == NOREG) /* if don't have LSW yet */ 2133 { 2134 retregs &= mLSW; 2135 goto L3; 2136 } 2137 } 2138 else 2139 { 2140 if (I64 && !(r & mLSW)) 2141 { 2142 retregs = *pretregs & (mMSW | mLSW); 2143 assert(retregs); 2144 goto L1; 2145 } 2146 lsreg = findreglsw(r); 2147 if (msreg == NOREG) 2148 { 2149 retregs &= mMSW; 2150 assert(retregs); 2151 goto L3; 2152 } 2153 } 2154 reg = (msreg == ES) ? lsreg : msreg; 2155 retregs = mask(msreg) | mask(lsreg); 2156 } 2157 else if (I16 && (tym == TYdouble || tym == TYdouble_alias)) 2158 { 2159 debug 2160 if (retregs != DOUBLEREGS) 2161 printf("retregs = %s, *pretregs = %s\n", regm_str(retregs), regm_str(*pretregs)); 2162 2163 assert(retregs == DOUBLEREGS); 2164 reg = AX; 2165 } 2166 else 2167 { 2168 debug 2169 { 2170 WRTYxx(tym); 2171 printf("\nallocreg: fil %s lin %d, regcon.mvar %s msavereg %s *pretregs %s, reg %d, tym x%x\n", 2172 file,line,regm_str(regcon.mvar),regm_str(msavereg),regm_str(*pretregs),*preg,tym); 2173 } 2174 assert(0); 2175 } 2176 if (retregs & regcon.mvar) // if conflict with reg vars 2177 { 2178 if (!(size > REGSIZE && *pretregs == (mAX | mDX))) 2179 { 2180 retregs = (*pretregs &= ~(retregs & regcon.mvar)); 2181 goto L1; // try other registers 2182 } 2183 } 2184 *preg = reg; 2185 *pretregs = retregs; 2186 2187 //printf("Allocating %s\n",regm_str(retregs)); 2188 last5retregs = last4retregs; 2189 last4retregs = last3retregs; 2190 last3retregs = last2retregs; 2191 last2retregs = lastretregs; 2192 lastretregs = retregs; 2193 getregs(cdb, retregs); 2194 } 2195 2196 /****************************** 2197 * Determine registers that should be destroyed upon arrival 2198 * to code entry point for exception handling. 2199 */ 2200 regm_t lpadregs() 2201 { 2202 regm_t used; 2203 if (config.ehmethod == EHmethod.EH_DWARF) 2204 used = allregs & ~mfuncreg; 2205 else 2206 used = (I32 | I64) ? allregs : (ALLREGS | mES); 2207 //printf("lpadregs(): used=%s, allregs=%s, mfuncreg=%s\n", regm_str(used), regm_str(allregs), regm_str(mfuncreg)); 2208 return used; 2209 } 2210 2211 2212 /************************* 2213 * Mark registers as used. 2214 */ 2215 2216 void useregs(regm_t regm) 2217 { 2218 //printf("useregs(x%x) %s\n", regm, regm_str(regm)); 2219 mfuncreg &= ~regm; 2220 regcon.used |= regm; // registers used in this block 2221 regcon.params &= ~regm; 2222 if (regm & regcon.mpvar) // if modified a fastpar register variable 2223 regcon.params = 0; // toss them all out 2224 } 2225 2226 /************************* 2227 * We are going to use the registers in mask r. 2228 * Generate any code necessary to save any regs. 2229 */ 2230 2231 void getregs(ref CodeBuilder cdb, regm_t r) 2232 { 2233 //printf("getregs(x%x) %s\n", r, regm_str(r)); 2234 regm_t ms = r & regcon.cse.mops; // mask of common subs we must save 2235 useregs(r); 2236 regcon.cse.mval &= ~r; 2237 msavereg &= ~r; // regs that are destroyed 2238 regcon.immed.mval &= ~r; 2239 if (ms) 2240 cse_save(cdb, ms); 2241 } 2242 2243 /************************* 2244 * We are going to use the registers in mask r. 2245 * Same as getregs(), but assert if code is needed to be generated. 2246 */ 2247 void getregsNoSave(regm_t r) 2248 { 2249 //printf("getregsNoSave(x%x) %s\n", r, regm_str(r)); 2250 assert(!(r & regcon.cse.mops)); // mask of common subs we must save 2251 useregs(r); 2252 regcon.cse.mval &= ~r; 2253 msavereg &= ~r; // regs that are destroyed 2254 regcon.immed.mval &= ~r; 2255 } 2256 2257 /***************************************** 2258 * Copy registers in cse.mops into memory. 2259 */ 2260 2261 private void cse_save(ref CodeBuilder cdb, regm_t ms) 2262 { 2263 assert((ms & regcon.cse.mops) == ms); 2264 regcon.cse.mops &= ~ms; 2265 2266 /* Skip CSEs that are already saved */ 2267 for (regm_t regm = 1; regm < mask(NUMREGS); regm <<= 1) 2268 { 2269 if (regm & ms) 2270 { 2271 const e = regcon.cse.value[findreg(regm)]; 2272 const sz = tysize(e.Ety); 2273 foreach (const ref cse; CSE.filter(e)) 2274 { 2275 if (sz <= REGSIZE || 2276 sz <= 2 * REGSIZE && 2277 (regm & mMSW && cse.regm & mMSW || 2278 regm & mLSW && cse.regm & mLSW) || 2279 sz == 4 * REGSIZE && regm == cse.regm 2280 ) 2281 { 2282 ms &= ~regm; 2283 if (!ms) 2284 return; 2285 break; 2286 } 2287 } 2288 } 2289 } 2290 2291 while (ms) 2292 { 2293 auto cse = CSE.add(); 2294 reg_t reg = findreg(ms); /* the register to save */ 2295 cse.e = regcon.cse.value[reg]; 2296 cse.regm = mask(reg); 2297 2298 ms &= ~mask(reg); /* turn off reg bit in ms */ 2299 2300 // If we can simply reload the CSE, we don't need to save it 2301 if (cse_simple(&cse.csimple, cse.e)) 2302 cse.flags |= CSEsimple; 2303 else 2304 { 2305 CSE.updateSizeAndAlign(cse.e); 2306 gen_storecse(cdb, cse.e.Ety, reg, cse.slot); 2307 reflocal = true; 2308 } 2309 } 2310 } 2311 2312 /****************************************** 2313 * Getregs without marking immediate register values as gone. 2314 */ 2315 2316 void getregs_imm(ref CodeBuilder cdb, regm_t r) 2317 { 2318 regm_t save = regcon.immed.mval; 2319 getregs(cdb,r); 2320 regcon.immed.mval = save; 2321 } 2322 2323 /****************************************** 2324 * Flush all CSE's out of registers and into memory. 2325 * Input: 2326 * do87 !=0 means save 87 registers too 2327 */ 2328 2329 void cse_flush(ref CodeBuilder cdb, int do87) 2330 { 2331 //dbg_printf("cse_flush()\n"); 2332 cse_save(cdb,regcon.cse.mops); // save any CSEs to memory 2333 if (do87) 2334 save87(cdb); // save any 8087 temporaries 2335 } 2336 2337 /************************* 2338 * Common subexpressions exist in registers. Note this in regcon.cse.mval. 2339 * Input: 2340 * e the subexpression 2341 * regm mask of registers holding it 2342 * opsflag if != 0 then regcon.cse.mops gets set too 2343 * Returns: 2344 * false not saved as a CSE 2345 * true saved as a CSE 2346 */ 2347 2348 bool cssave(elem *e,regm_t regm,uint opsflag) 2349 { 2350 bool result = false; 2351 2352 /*if (e.Ecount && e.Ecount == e.Ecomsub)*/ 2353 if (e.Ecount && e.Ecomsub) 2354 { 2355 if (!opsflag && pass != PASSfinal && (I32 || I64)) 2356 return false; 2357 2358 //printf("cssave(e = %p, regm = %s, opsflag = x%x)\n", e, regm_str(regm), opsflag); 2359 regm &= mBP | ALLREGS | mES | XMMREGS; /* just to be sure */ 2360 2361 /+ 2362 /* Do not register CSEs if they are register variables and */ 2363 /* are not operator nodes. This forces the register allocation */ 2364 /* to go through allocreg(), which will prevent using register */ 2365 /* variables for scratch. */ 2366 if (opsflag || !(regm & regcon.mvar)) 2367 +/ 2368 for (uint i = 0; regm; i++) 2369 { 2370 regm_t mi = mask(i); 2371 if (regm & mi) 2372 { 2373 regm &= ~mi; 2374 2375 // If we don't need this CSE, and the register already 2376 // holds a CSE that we do need, don't mark the new one 2377 if (regcon.cse.mval & mi && regcon.cse.value[i] != e && 2378 !opsflag && regcon.cse.mops & mi) 2379 continue; 2380 2381 regcon.cse.mval |= mi; 2382 if (opsflag) 2383 regcon.cse.mops |= mi; 2384 //printf("cssave set: regcon.cse.value[%s] = %p\n",regstring[i],e); 2385 regcon.cse.value[i] = e; 2386 result = true; 2387 } 2388 } 2389 } 2390 return result; 2391 } 2392 2393 /************************************* 2394 * Determine if a computation should be done into a register. 2395 */ 2396 2397 bool evalinregister(elem *e) 2398 { 2399 if (config.exe == EX_WIN64 && e.Eoper == OPrelconst) 2400 return true; 2401 2402 if (e.Ecount == 0) /* elem is not a CSE, therefore */ 2403 /* we don't need to evaluate it */ 2404 /* in a register */ 2405 return false; 2406 if (!OTleaf(e.Eoper)) /* operators are always in register */ 2407 return true; 2408 2409 // Need to rethink this code if float or double can be CSE'd 2410 uint sz = tysize(e.Ety); 2411 if (e.Ecount == e.Ecomsub) /* elem is a CSE that needs */ 2412 /* to be generated */ 2413 { 2414 if ((I32 || I64) && 2415 //pass == PASSfinal && // bug 8987 2416 sz <= REGSIZE) 2417 { 2418 // Do it only if at least 2 registers are available 2419 regm_t m = allregs & ~regcon.mvar; 2420 if (sz == 1) 2421 m &= BYTEREGS; 2422 if (m & (m - 1)) // if more than one register 2423 { // Need to be at least 3 registers available, as 2424 // addressing modes can use up 2. 2425 while (!(m & 1)) 2426 m >>= 1; 2427 m >>= 1; 2428 if (m & (m - 1)) 2429 return true; 2430 } 2431 } 2432 return false; 2433 } 2434 2435 /* Elem is now a CSE that might have been generated. If so, and */ 2436 /* it's in a register already, the computation should be done */ 2437 /* using that register. */ 2438 regm_t emask = 0; 2439 for (uint i = 0; i < regcon.cse.value.length; i++) 2440 if (regcon.cse.value[i] == e) 2441 emask |= mask(i); 2442 emask &= regcon.cse.mval; // mask of available CSEs 2443 if (sz <= REGSIZE) 2444 return emask != 0; /* the CSE is in a register */ 2445 else if (sz <= 2 * REGSIZE) 2446 return (emask & mMSW) && (emask & mLSW); 2447 return true; /* cop-out for now */ 2448 } 2449 2450 /******************************************************* 2451 * Return mask of scratch registers. 2452 */ 2453 2454 regm_t getscratch() 2455 { 2456 regm_t scratch = 0; 2457 if (pass == PASSfinal) 2458 { 2459 scratch = allregs & ~(regcon.mvar | regcon.mpvar | regcon.cse.mval | 2460 regcon.immed.mval | regcon.params | mfuncreg); 2461 } 2462 return scratch; 2463 } 2464 2465 /****************************** 2466 * Evaluate an elem that is a common subexp that has been encountered 2467 * before. 2468 * Look first to see if it is already in a register. 2469 */ 2470 2471 private void comsub(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2472 { 2473 tym_t tym; 2474 regm_t regm,emask; 2475 reg_t reg; 2476 uint byte_,sz; 2477 2478 //printf("comsub(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 2479 elem_debug(e); 2480 2481 debug 2482 { 2483 if (e.Ecomsub > e.Ecount) 2484 elem_print(e); 2485 } 2486 2487 assert(e.Ecomsub <= e.Ecount); 2488 2489 if (*pretregs == 0) // no possible side effects anyway 2490 { 2491 return; 2492 } 2493 2494 /* First construct a mask, emask, of all the registers that 2495 * have the right contents. 2496 */ 2497 emask = 0; 2498 for (uint i = 0; i < regcon.cse.value.length; i++) 2499 { 2500 //dbg_printf("regcon.cse.value[%d] = %p\n",i,regcon.cse.value[i]); 2501 if (regcon.cse.value[i] == e) // if contents are right 2502 emask |= mask(i); // turn on bit for reg 2503 } 2504 emask &= regcon.cse.mval; // make sure all bits are valid 2505 2506 if (emask & XMMREGS && *pretregs == mPSW) 2507 { } 2508 else if (tyxmmreg(e.Ety) && config.fpxmmregs) 2509 { 2510 if (*pretregs & (mST0 | mST01)) 2511 { 2512 regm_t retregs = *pretregs & mST0 ? XMMREGS : mXMM0 | mXMM1; 2513 comsub(cdb, e, &retregs); 2514 fixresult(cdb,e,retregs,pretregs); 2515 return; 2516 } 2517 } 2518 else if (tyfloating(e.Ety) && config.inline8087) 2519 { 2520 comsub87(cdb,e,pretregs); 2521 return; 2522 } 2523 2524 2525 /* create mask of CSEs */ 2526 regm_t csemask = CSE.mask(e); 2527 csemask &= ~emask; // stuff already in registers 2528 2529 debug if (debugw) 2530 { 2531 printf("comsub(e=%p): *pretregs=%s, emask=%s, csemask=%s, regcon.cse.mval=%s, regcon.mvar=%s\n", 2532 e,regm_str(*pretregs),regm_str(emask),regm_str(csemask), 2533 regm_str(regcon.cse.mval),regm_str(regcon.mvar)); 2534 if (regcon.cse.mval & 1) 2535 elem_print(regcon.cse.value[0]); 2536 } 2537 2538 tym = tybasic(e.Ety); 2539 sz = _tysize[tym]; 2540 byte_ = sz == 1; 2541 2542 if (sz <= REGSIZE || (tyxmmreg(tym) && config.fpxmmregs)) // if data will fit in one register 2543 { 2544 /* First see if it is already in a correct register */ 2545 2546 regm = emask & *pretregs; 2547 if (regm == 0) 2548 regm = emask; /* try any other register */ 2549 if (regm) /* if it's in a register */ 2550 { 2551 if (!OTleaf(e.Eoper) || !(regm & regcon.mvar) || (*pretregs & regcon.mvar) == *pretregs) 2552 { 2553 regm = mask(findreg(regm)); 2554 fixresult(cdb,e,regm,pretregs); 2555 return; 2556 } 2557 } 2558 2559 if (OTleaf(e.Eoper)) /* if not op or func */ 2560 goto reload; /* reload data */ 2561 2562 foreach (ref cse; CSE.filter(e)) 2563 { 2564 regm_t retregs; 2565 2566 if (cse.flags & CSEsimple) 2567 { 2568 retregs = *pretregs; 2569 if (byte_ && !(retregs & BYTEREGS)) 2570 retregs = BYTEREGS; 2571 else if (!(retregs & allregs)) 2572 retregs = allregs; 2573 allocreg(cdb,&retregs,®,tym); 2574 code *cr = &cse.csimple; 2575 cr.setReg(reg); 2576 if (I64 && reg >= 4 && tysize(cse.e.Ety) == 1) 2577 cr.Irex |= REX; 2578 cdb.gen(cr); 2579 goto L10; 2580 } 2581 else 2582 { 2583 reflocal = true; 2584 cse.flags |= CSEload; 2585 if (*pretregs == mPSW) // if result in CCs only 2586 { 2587 if (config.fpxmmregs && (tyxmmreg(cse.e.Ety) || tyvector(cse.e.Ety))) 2588 { 2589 retregs = XMMREGS; 2590 allocreg(cdb,&retregs,®,tym); 2591 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2592 regcon.cse.mval |= mask(reg); // cs is in a reg 2593 regcon.cse.value[reg] = e; 2594 fixresult(cdb,e,retregs,pretregs); 2595 } 2596 else 2597 { 2598 // CMP cs[BP],0 2599 gen_testcse(cdb, cse.e.Ety, sz, cse.slot); 2600 } 2601 } 2602 else 2603 { 2604 retregs = *pretregs; 2605 if (byte_ && !(retregs & BYTEREGS)) 2606 retregs = BYTEREGS; 2607 allocreg(cdb,&retregs,®,tym); 2608 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2609 L10: 2610 regcon.cse.mval |= mask(reg); // cs is in a reg 2611 regcon.cse.value[reg] = e; 2612 fixresult(cdb,e,retregs,pretregs); 2613 } 2614 } 2615 return; 2616 } 2617 2618 debug 2619 { 2620 printf("couldn't find cse e = %p, pass = %d\n",e,pass); 2621 elem_print(e); 2622 } 2623 assert(0); /* should have found it */ 2624 } 2625 else /* reg pair is req'd */ 2626 if (sz <= 2 * REGSIZE) 2627 { 2628 reg_t msreg,lsreg; 2629 2630 /* see if we have both */ 2631 if (!((emask | csemask) & mMSW && (emask | csemask) & (mLSW | mBP))) 2632 { /* we don't have both */ 2633 debug if (!OTleaf(e.Eoper)) 2634 { 2635 printf("e = %p, op = x%x, emask = %s, csemask = %s\n", 2636 e,e.Eoper,regm_str(emask),regm_str(csemask)); 2637 //printf("mMSW = x%x, mLSW = x%x\n", mMSW, mLSW); 2638 elem_print(e); 2639 } 2640 2641 assert(OTleaf(e.Eoper)); /* must have both for operators */ 2642 goto reload; 2643 } 2644 2645 /* Look for right vals in any regs */ 2646 regm = *pretregs & mMSW; 2647 if (emask & regm) 2648 msreg = findreg(emask & regm); 2649 else if (emask & mMSW) 2650 msreg = findregmsw(emask); 2651 else /* reload from cse array */ 2652 { 2653 if (!regm) 2654 regm = mMSW & ALLREGS; 2655 allocreg(cdb,®m,&msreg,TYint); 2656 loadcse(cdb,e,msreg,mMSW); 2657 } 2658 2659 regm = *pretregs & (mLSW | mBP); 2660 if (emask & regm) 2661 lsreg = findreg(emask & regm); 2662 else if (emask & (mLSW | mBP)) 2663 lsreg = findreglsw(emask); 2664 else 2665 { 2666 if (!regm) 2667 regm = mLSW; 2668 allocreg(cdb,®m,&lsreg,TYint); 2669 loadcse(cdb,e,lsreg,mLSW | mBP); 2670 } 2671 2672 regm = mask(msreg) | mask(lsreg); /* mask of result */ 2673 fixresult(cdb,e,regm,pretregs); 2674 return; 2675 } 2676 else if (tym == TYdouble || tym == TYdouble_alias) // double 2677 { 2678 assert(I16); 2679 if (((csemask | emask) & DOUBLEREGS_16) == DOUBLEREGS_16) 2680 { 2681 static const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; // duplicate of one in cod4.d 2682 for (reg = 0; reg != NOREG; reg = dblreg[reg]) 2683 { 2684 assert(cast(int) reg >= 0 && reg <= 7); 2685 if (mask(reg) & csemask) 2686 loadcse(cdb,e,reg,mask(reg)); 2687 } 2688 regm = DOUBLEREGS_16; 2689 fixresult(cdb,e,regm,pretregs); 2690 return; 2691 } 2692 if (OTleaf(e.Eoper)) goto reload; 2693 2694 debug 2695 printf("e = %p, csemask = %s, emask = %s\n",e,regm_str(csemask),regm_str(emask)); 2696 2697 assert(0); 2698 } 2699 else 2700 { 2701 debug 2702 printf("e = %p, tym = x%x\n",e,tym); 2703 2704 assert(0); 2705 } 2706 2707 reload: /* reload result from memory */ 2708 switch (e.Eoper) 2709 { 2710 case OPrelconst: 2711 cdrelconst(cdb,e,pretregs); 2712 break; 2713 2714 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2715 { 2716 case OPgot: 2717 cdgot(cdb,e,pretregs); 2718 break; 2719 } 2720 default: 2721 if (*pretregs == mPSW && 2722 config.fpxmmregs && 2723 (tyxmmreg(tym) || tysimd(tym))) 2724 { 2725 regm_t retregs = XMMREGS | mPSW; 2726 loaddata(cdb,e,&retregs); 2727 cssave(e,retregs,false); 2728 return; 2729 } 2730 loaddata(cdb,e,pretregs); 2731 break; 2732 } 2733 cssave(e,*pretregs,false); 2734 } 2735 2736 2737 /***************************** 2738 * Load reg from cse save area on stack. 2739 */ 2740 2741 private void loadcse(ref CodeBuilder cdb,elem *e,reg_t reg,regm_t regm) 2742 { 2743 foreach (ref cse; CSE.filter(e)) 2744 { 2745 //printf("CSE[%d] = %p, regm = %s\n", i, cse.e, regm_str(cse.regm)); 2746 if (cse.regm & regm) 2747 { 2748 reflocal = true; 2749 cse.flags |= CSEload; /* it was loaded */ 2750 regcon.cse.value[reg] = e; 2751 regcon.cse.mval |= mask(reg); 2752 getregs(cdb,mask(reg)); 2753 gen_loadcse(cdb, cse.e.Ety, reg, cse.slot); 2754 return; 2755 } 2756 } 2757 debug 2758 { 2759 printf("loadcse(e = %p, reg = %d, regm = %s)\n",e,reg,regm_str(regm)); 2760 elem_print(e); 2761 } 2762 assert(0); 2763 } 2764 2765 /*************************** 2766 * Generate code sequence for an elem. 2767 * Input: 2768 * pretregs = mask of possible registers to return result in 2769 * Note: longs are in AX,BX or CX,DX or SI,DI 2770 * doubles are AX,BX,CX,DX only 2771 * constflag = 1 for user of result will not modify the 2772 * registers returned in *pretregs. 2773 * 2 for freenode() not called. 2774 * Output: 2775 * *pretregs mask of registers result is returned in 2776 * Returns: 2777 * pointer to code sequence generated 2778 */ 2779 2780 void callcdxxx(ref CodeBuilder cdb, elem *e, regm_t *pretregs, OPER op) 2781 { 2782 (*cdxxx[op])(cdb,e,pretregs); 2783 } 2784 2785 // jump table 2786 private extern (C++) __gshared nothrow void function (ref CodeBuilder,elem *,regm_t *)[OPMAX] cdxxx = 2787 [ 2788 OPunde: &cderr, 2789 OPadd: &cdorth, 2790 OPmul: &cdmul, 2791 OPand: &cdorth, 2792 OPmin: &cdorth, 2793 OPnot: &cdnot, 2794 OPcom: &cdcom, 2795 OPcond: &cdcond, 2796 OPcomma: &cdcomma, 2797 OPremquo: &cdmul, 2798 OPdiv: &cdmul, 2799 OPmod: &cdmul, 2800 OPxor: &cdorth, 2801 OPstring: &cderr, 2802 OPrelconst: &cdrelconst, 2803 OPinp: &cdport, 2804 OPoutp: &cdport, 2805 OPasm: &cdasm, 2806 OPinfo: &cdinfo, 2807 OPdctor: &cddctor, 2808 OPddtor: &cdddtor, 2809 OPctor: &cdctor, 2810 OPdtor: &cddtor, 2811 OPmark: &cdmark, 2812 OPvoid: &cdvoid, 2813 OPhalt: &cdhalt, 2814 OPnullptr: &cderr, 2815 OPpair: &cdpair, 2816 OPrpair: &cdpair, 2817 2818 OPor: &cdorth, 2819 OPoror: &cdloglog, 2820 OPandand: &cdloglog, 2821 OProl: &cdshift, 2822 OPror: &cdshift, 2823 OPshl: &cdshift, 2824 OPshr: &cdshift, 2825 OPashr: &cdshift, 2826 OPbit: &cderr, 2827 OPind: &cdind, 2828 OPaddr: &cderr, 2829 OPneg: &cdneg, 2830 OPuadd: &cderr, 2831 OPabs: &cdabs, 2832 OPsqrt: &cdneg, 2833 OPsin: &cdneg, 2834 OPcos: &cdneg, 2835 OPscale: &cdscale, 2836 OPyl2x: &cdscale, 2837 OPyl2xp1: &cdscale, 2838 OPcmpxchg: &cdcmpxchg, 2839 OPrint: &cdneg, 2840 OPrndtol: &cdrndtol, 2841 OPstrlen: &cdstrlen, 2842 OPstrcpy: &cdstrcpy, 2843 OPmemcpy: &cdmemcpy, 2844 OPmemset: &cdmemset, 2845 OPstrcat: &cderr, 2846 OPstrcmp: &cdstrcmp, 2847 OPmemcmp: &cdmemcmp, 2848 OPsetjmp: &cdsetjmp, 2849 OPnegass: &cdaddass, 2850 OPpreinc: &cderr, 2851 OPpredec: &cderr, 2852 OPstreq: &cdstreq, 2853 OPpostinc: &cdpost, 2854 OPpostdec: &cdpost, 2855 OPeq: &cdeq, 2856 OPaddass: &cdaddass, 2857 OPminass: &cdaddass, 2858 OPmulass: &cdmulass, 2859 OPdivass: &cdmulass, 2860 OPmodass: &cdmulass, 2861 OPshrass: &cdshass, 2862 OPashrass: &cdshass, 2863 OPshlass: &cdshass, 2864 OPandass: &cdaddass, 2865 OPxorass: &cdaddass, 2866 OPorass: &cdaddass, 2867 2868 OPle: &cdcmp, 2869 OPgt: &cdcmp, 2870 OPlt: &cdcmp, 2871 OPge: &cdcmp, 2872 OPeqeq: &cdcmp, 2873 OPne: &cdcmp, 2874 2875 OPunord: &cdcmp, 2876 OPlg: &cdcmp, 2877 OPleg: &cdcmp, 2878 OPule: &cdcmp, 2879 OPul: &cdcmp, 2880 OPuge: &cdcmp, 2881 OPug: &cdcmp, 2882 OPue: &cdcmp, 2883 OPngt: &cdcmp, 2884 OPnge: &cdcmp, 2885 OPnlt: &cdcmp, 2886 OPnle: &cdcmp, 2887 OPord: &cdcmp, 2888 OPnlg: &cdcmp, 2889 OPnleg: &cdcmp, 2890 OPnule: &cdcmp, 2891 OPnul: &cdcmp, 2892 OPnuge: &cdcmp, 2893 OPnug: &cdcmp, 2894 OPnue: &cdcmp, 2895 2896 OPvp_fp: &cdcnvt, 2897 OPcvp_fp: &cdcnvt, 2898 OPoffset: &cdlngsht, 2899 OPnp_fp: &cdshtlng, 2900 OPnp_f16p: &cdfar16, 2901 OPf16p_np: &cdfar16, 2902 2903 OPs16_32: &cdshtlng, 2904 OPu16_32: &cdshtlng, 2905 OPd_s32: &cdcnvt, 2906 OPb_8: &cdcnvt, 2907 OPs32_d: &cdcnvt, 2908 OPd_s16: &cdcnvt, 2909 OPs16_d: &cdcnvt, 2910 OPd_u16: &cdcnvt, 2911 OPu16_d: &cdcnvt, 2912 OPd_u32: &cdcnvt, 2913 OPu32_d: &cdcnvt, 2914 OP32_16: &cdlngsht, 2915 OPd_f: &cdcnvt, 2916 OPf_d: &cdcnvt, 2917 OPd_ld: &cdcnvt, 2918 OPld_d: &cdcnvt, 2919 OPc_r: &cdconvt87, 2920 OPc_i: &cdconvt87, 2921 OPu8_16: &cdbyteint, 2922 OPs8_16: &cdbyteint, 2923 OP16_8: &cdlngsht, 2924 OPu32_64: &cdshtlng, 2925 OPs32_64: &cdshtlng, 2926 OP64_32: &cdlngsht, 2927 OPu64_128: &cdshtlng, 2928 OPs64_128: &cdshtlng, 2929 OP128_64: &cdlngsht, 2930 OPmsw: &cdmsw, 2931 2932 OPd_s64: &cdcnvt, 2933 OPs64_d: &cdcnvt, 2934 OPd_u64: &cdcnvt, 2935 OPu64_d: &cdcnvt, 2936 OPld_u64: &cdcnvt, 2937 OPparam: &cderr, 2938 OPsizeof: &cderr, 2939 OParrow: &cderr, 2940 OParrowstar: &cderr, 2941 OPcolon: &cderr, 2942 OPcolon2: &cderr, 2943 OPbool: &cdnot, 2944 OPcall: &cdfunc, 2945 OPucall: &cdfunc, 2946 OPcallns: &cdfunc, 2947 OPucallns: &cdfunc, 2948 OPstrpar: &cderr, 2949 OPstrctor: &cderr, 2950 OPstrthis: &cdstrthis, 2951 OPconst: &cderr, 2952 OPvar: &cderr, 2953 OPnew: &cderr, 2954 OPanew: &cderr, 2955 OPdelete: &cderr, 2956 OPadelete: &cderr, 2957 OPbrack: &cderr, 2958 OPframeptr: &cdframeptr, 2959 OPgot: &cdgot, 2960 2961 OPbsf: &cdbscan, 2962 OPbsr: &cdbscan, 2963 OPbtst: &cdbtst, 2964 OPbt: &cdbt, 2965 OPbtc: &cdbt, 2966 OPbtr: &cdbt, 2967 OPbts: &cdbt, 2968 2969 OPbswap: &cdbswap, 2970 OPpopcnt: &cdpopcnt, 2971 OPvector: &cdvector, 2972 OPvecsto: &cdvecsto, 2973 OPvecfill: &cdvecfill, 2974 OPva_start: &cderr, 2975 OPprefetch: &cdprefetch, 2976 ]; 2977 2978 2979 void codelem(ref CodeBuilder cdb,elem *e,regm_t *pretregs,uint constflag) 2980 { 2981 Symbol *s; 2982 2983 debug if (debugw) 2984 { 2985 printf("+codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs)); 2986 WROP(e.Eoper); 2987 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 2988 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 2989 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 2990 } 2991 2992 assert(e); 2993 elem_debug(e); 2994 if ((regcon.cse.mops & regcon.cse.mval) != regcon.cse.mops) 2995 { 2996 debug 2997 { 2998 printf("+codelem(e=%p,*pretregs=%s) ", e, regm_str(*pretregs)); 2999 elem_print(e); 3000 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3001 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3002 printf("Ecount = %d, Ecomsub = %d\n", e.Ecount, e.Ecomsub); 3003 } 3004 assert(0); 3005 } 3006 3007 if (!(constflag & 1) && *pretregs & (mES | ALLREGS | mBP | XMMREGS) & ~regcon.mvar) 3008 *pretregs &= ~regcon.mvar; /* can't use register vars */ 3009 3010 uint op = e.Eoper; 3011 if (e.Ecount && e.Ecount != e.Ecomsub) // if common subexp 3012 { 3013 comsub(cdb,e,pretregs); 3014 goto L1; 3015 } 3016 3017 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 3018 cdb.genlinnum(e.Esrcpos); 3019 3020 switch (op) 3021 { 3022 default: 3023 if (e.Ecount) /* if common subexp */ 3024 { 3025 /* if no return value */ 3026 if ((*pretregs & (mSTACK | mES | ALLREGS | mBP | XMMREGS)) == 0) 3027 { 3028 if (*pretregs & (mST0 | mST01)) 3029 { 3030 //printf("generate ST0 comsub for:\n"); 3031 //elem_print(e); 3032 3033 regm_t retregs = *pretregs & mST0 ? mXMM0 : mXMM0|mXMM1; 3034 (*cdxxx[op])(cdb,e,&retregs); 3035 cssave(e,retregs,!OTleaf(op)); 3036 fixresult(cdb, e, retregs, pretregs); 3037 goto L1; 3038 } 3039 if (tysize(e.Ety) == 1) 3040 *pretregs |= BYTEREGS; 3041 else if ((tyxmmreg(e.Ety) || tysimd(e.Ety)) && config.fpxmmregs) 3042 *pretregs |= XMMREGS; 3043 else if (tybasic(e.Ety) == TYdouble || tybasic(e.Ety) == TYdouble_alias) 3044 *pretregs |= DOUBLEREGS; 3045 else 3046 *pretregs |= ALLREGS; /* make one */ 3047 } 3048 3049 /* BUG: For CSEs, make sure we have both an MSW */ 3050 /* and an LSW specified in *pretregs */ 3051 } 3052 assert(op <= OPMAX); 3053 (*cdxxx[op])(cdb,e,pretregs); 3054 break; 3055 3056 case OPrelconst: 3057 cdrelconst(cdb,e,pretregs); 3058 break; 3059 3060 case OPvar: 3061 if (constflag & 1 && (s = e.EV.Vsym).Sfl == FLreg && 3062 (s.Sregm & *pretregs) == s.Sregm) 3063 { 3064 if (tysize(e.Ety) <= REGSIZE && tysize(s.Stype.Tty) == 2 * REGSIZE) 3065 *pretregs &= mPSW | (s.Sregm & mLSW); 3066 else 3067 *pretregs &= mPSW | s.Sregm; 3068 } 3069 goto case OPconst; 3070 3071 case OPconst: 3072 if (*pretregs == 0 && (e.Ecount >= 3 || e.Ety & mTYvolatile)) 3073 { 3074 switch (tybasic(e.Ety)) 3075 { 3076 case TYbool: 3077 case TYchar: 3078 case TYschar: 3079 case TYuchar: 3080 *pretregs |= BYTEREGS; 3081 break; 3082 3083 case TYnref: 3084 case TYnptr: 3085 case TYsptr: 3086 case TYcptr: 3087 case TYfgPtr: 3088 case TYimmutPtr: 3089 case TYsharePtr: 3090 case TYrestrictPtr: 3091 *pretregs |= I16 ? IDXREGS : ALLREGS; 3092 break; 3093 3094 case TYshort: 3095 case TYushort: 3096 case TYint: 3097 case TYuint: 3098 case TYlong: 3099 case TYulong: 3100 case TYllong: 3101 case TYullong: 3102 case TYcent: 3103 case TYucent: 3104 case TYfptr: 3105 case TYhptr: 3106 case TYvptr: 3107 *pretregs |= ALLREGS; 3108 break; 3109 3110 default: 3111 break; 3112 } 3113 } 3114 loaddata(cdb,e,pretregs); 3115 break; 3116 } 3117 cssave(e,*pretregs,!OTleaf(op)); 3118 L1: 3119 if (!(constflag & 2)) 3120 freenode(e); 3121 3122 debug if (debugw) 3123 { 3124 printf("-codelem(e=%p,*pretregs=%s) ",e,regm_str(*pretregs)); 3125 WROP(op); 3126 printf("msavereg=%s regcon.cse.mval=%s regcon.cse.mops=%s\n", 3127 regm_str(msavereg),regm_str(regcon.cse.mval),regm_str(regcon.cse.mops)); 3128 } 3129 } 3130 3131 /******************************* 3132 * Same as codelem(), but do not destroy the registers in keepmsk. 3133 * Use scratch registers as much as possible, then use stack. 3134 * Input: 3135 * constflag true if user of result will not modify the 3136 * registers returned in *pretregs. 3137 */ 3138 3139 void scodelem(ref CodeBuilder cdb, elem *e,regm_t *pretregs,regm_t keepmsk,bool constflag) 3140 { 3141 regm_t touse; 3142 3143 debug if (debugw) 3144 printf("+scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3145 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3146 3147 elem_debug(e); 3148 if (constflag) 3149 { 3150 regm_t regm; 3151 reg_t reg; 3152 3153 if (isregvar(e,®m,®) && // if e is a register variable 3154 (regm & *pretregs) == regm && // in one of the right regs 3155 e.EV.Voffset == 0 3156 ) 3157 { 3158 uint sz1 = tysize(e.Ety); 3159 uint sz2 = tysize(e.EV.Vsym.Stype.Tty); 3160 if (sz1 <= REGSIZE && sz2 > REGSIZE) 3161 regm &= mLSW | XMMREGS; 3162 fixresult(cdb,e,regm,pretregs); 3163 cssave(e,regm,0); 3164 freenode(e); 3165 3166 debug if (debugw) 3167 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3168 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3169 3170 return; 3171 } 3172 } 3173 regm_t overlap = msavereg & keepmsk; 3174 msavereg |= keepmsk; /* add to mask of regs to save */ 3175 regm_t oldregcon = regcon.cse.mval; 3176 regm_t oldregimmed = regcon.immed.mval; 3177 regm_t oldmfuncreg = mfuncreg; /* remember old one */ 3178 mfuncreg = (XMMREGS | mBP | mES | ALLREGS) & ~regcon.mvar; 3179 uint stackpushsave = stackpush; 3180 char calledafuncsave = calledafunc; 3181 calledafunc = 0; 3182 CodeBuilder cdbx; cdbx.ctor(); 3183 codelem(cdbx,e,pretregs,constflag); // generate code for the elem 3184 3185 regm_t tosave = keepmsk & ~msavereg; /* registers to save */ 3186 if (tosave) 3187 { 3188 cgstate.stackclean++; 3189 genstackclean(cdbx,stackpush - stackpushsave,*pretregs | msavereg); 3190 cgstate.stackclean--; 3191 } 3192 3193 /* Assert that no new CSEs are generated that are not reflected */ 3194 /* in mfuncreg. */ 3195 debug if ((mfuncreg & (regcon.cse.mval & ~oldregcon)) != 0) 3196 printf("mfuncreg %s, regcon.cse.mval %s, oldregcon %s, regcon.mvar %s\n", 3197 regm_str(mfuncreg),regm_str(regcon.cse.mval),regm_str(oldregcon),regm_str(regcon.mvar)); 3198 3199 assert((mfuncreg & (regcon.cse.mval & ~oldregcon)) == 0); 3200 3201 /* bugzilla 3521 3202 * The problem is: 3203 * reg op (reg = exp) 3204 * where reg must be preserved (in keepregs) while the expression to be evaluated 3205 * must change it. 3206 * The only solution is to make this variable not a register. 3207 */ 3208 if (regcon.mvar & tosave) 3209 { 3210 //elem_print(e); 3211 //printf("test1: regcon.mvar %s tosave %s\n", regm_str(regcon.mvar), regm_str(tosave)); 3212 cgreg_unregister(regcon.mvar & tosave); 3213 } 3214 3215 /* which registers can we use to save other registers in? */ 3216 if (config.flags4 & CFG4space || // if optimize for space 3217 config.target_cpu >= TARGET_80486) // PUSH/POP ops are 1 cycle 3218 touse = 0; // PUSH/POP pairs are always shorter 3219 else 3220 { 3221 touse = mfuncreg & allregs & ~(msavereg | oldregcon | regcon.cse.mval); 3222 /* Don't use registers we'll have to save/restore */ 3223 touse &= ~(fregsaved & oldmfuncreg); 3224 /* Don't use registers that have constant values in them, since 3225 the code generated might have used the value. 3226 */ 3227 touse &= ~oldregimmed; 3228 } 3229 3230 CodeBuilder cdbs1; cdbs1.ctor(); 3231 code *cs2 = null; 3232 int adjesp = 0; 3233 3234 for (uint i = 0; tosave; i++) 3235 { 3236 regm_t mi = mask(i); 3237 3238 assert(i < REGMAX); 3239 if (mi & tosave) /* i = register to save */ 3240 { 3241 if (touse) /* if any scratch registers */ 3242 { 3243 uint j; 3244 for (j = 0; j < 8; j++) 3245 { 3246 regm_t mj = mask(j); 3247 3248 if (touse & mj) 3249 { 3250 genmovreg(cdbs1,j,i); 3251 cs2 = cat(genmovreg(i,j),cs2); 3252 touse &= ~mj; 3253 mfuncreg &= ~mj; 3254 regcon.used |= mj; 3255 break; 3256 } 3257 } 3258 assert(j < 8); 3259 } 3260 else // else use memory 3261 { 3262 CodeBuilder cdby; cdby.ctor(); 3263 uint size = gensaverestore(mask(i), cdbs1, cdby); 3264 cs2 = cat(cdby.finish(),cs2); 3265 if (size) 3266 { 3267 stackchanged = 1; 3268 adjesp += size; 3269 } 3270 } 3271 getregs(cdbx,mi); 3272 tosave &= ~mi; 3273 } 3274 } 3275 CodeBuilder cdbs2; cdbs2.ctor(); 3276 if (adjesp) 3277 { 3278 // If this is done an odd number of times, it 3279 // will throw off the 8 byte stack alignment. 3280 // We should *only* worry about this if a function 3281 // was called in the code generation by codelem(). 3282 int sz = -(adjesp & (STACKALIGN - 1)) & (STACKALIGN - 1); 3283 if (calledafunc && !I16 && sz && (STACKALIGN >= 16 || config.flags4 & CFG4stackalign)) 3284 { 3285 regm_t mval_save = regcon.immed.mval; 3286 regcon.immed.mval = 0; // prevent reghasvalue() optimizations 3287 // because c hasn't been executed yet 3288 cod3_stackadj(cdbs1, sz); 3289 regcon.immed.mval = mval_save; 3290 cdbs1.genadjesp(sz); 3291 3292 cod3_stackadj(cdbs2, -sz); 3293 cdbs2.genadjesp(-sz); 3294 } 3295 cdbs2.append(cs2); 3296 3297 3298 cdbs1.genadjesp(adjesp); 3299 cdbs2.genadjesp(-adjesp); 3300 } 3301 else 3302 cdbs2.append(cs2); 3303 3304 calledafunc |= calledafuncsave; 3305 msavereg &= ~keepmsk | overlap; /* remove from mask of regs to save */ 3306 mfuncreg &= oldmfuncreg; /* update original */ 3307 3308 debug if (debugw) 3309 printf("-scodelem(e=%p *pretregs=%s keepmsk=%s constflag=%d\n", 3310 e,regm_str(*pretregs),regm_str(keepmsk),constflag); 3311 3312 cdb.append(cdbs1); 3313 cdb.append(cdbx); 3314 cdb.append(cdbs2); 3315 return; 3316 } 3317 3318 /********************************************* 3319 * Turn register mask into a string suitable for printing. 3320 */ 3321 3322 const(char)* regm_str(regm_t rm) 3323 { 3324 enum NUM = 10; 3325 enum SMAX = 128; 3326 __gshared char[SMAX + 1][NUM] str; 3327 __gshared int i; 3328 3329 if (rm == 0) 3330 return "0"; 3331 if (rm == ALLREGS) 3332 return "ALLREGS"; 3333 if (rm == BYTEREGS) 3334 return "BYTEREGS"; 3335 if (rm == allregs) 3336 return "allregs"; 3337 if (rm == XMMREGS) 3338 return "XMMREGS"; 3339 char *p = str[i].ptr; 3340 if (++i == NUM) 3341 i = 0; 3342 *p = 0; 3343 for (size_t j = 0; j < 32; j++) 3344 { 3345 if (mask(cast(uint)j) & rm) 3346 { 3347 strcat(p,regstring[j]); 3348 rm &= ~mask(cast(uint)j); 3349 if (rm) 3350 strcat(p,"|"); 3351 } 3352 } 3353 if (rm) 3354 { char *s = p + strlen(p); 3355 sprintf(s,"x%02x",rm); 3356 } 3357 assert(strlen(p) <= SMAX); 3358 return strdup(p); 3359 } 3360 3361 /********************************* 3362 * Scan down comma-expressions. 3363 * Output: 3364 * *pe = first elem down right side that is not an OPcomma 3365 * Returns: 3366 * code generated for left branches of comma-expressions 3367 */ 3368 3369 void docommas(ref CodeBuilder cdb,elem **pe) 3370 { 3371 uint stackpushsave = stackpush; 3372 int stackcleansave = cgstate.stackclean; 3373 cgstate.stackclean = 0; 3374 elem* e = *pe; 3375 while (1) 3376 { 3377 if (configv.addlinenumbers && e.Esrcpos.Slinnum) 3378 { 3379 cdb.genlinnum(e.Esrcpos); 3380 //e.Esrcpos.Slinnum = 0; // don't do it twice 3381 } 3382 if (e.Eoper != OPcomma) 3383 break; 3384 regm_t retregs = 0; 3385 codelem(cdb,e.EV.E1,&retregs,true); 3386 elem* eold = e; 3387 e = e.EV.E2; 3388 freenode(eold); 3389 } 3390 *pe = e; 3391 assert(cgstate.stackclean == 0); 3392 cgstate.stackclean = stackcleansave; 3393 genstackclean(cdb,stackpush - stackpushsave,0); 3394 } 3395 3396 /************************** 3397 * For elems in regcon that don't match regconsave, 3398 * clear the corresponding bit in regcon.cse.mval. 3399 * Do same for regcon.immed. 3400 */ 3401 3402 void andregcon(con_t *pregconsave) 3403 { 3404 regm_t m = ~1; 3405 for (int i = 0; i < REGMAX; i++) 3406 { 3407 if (pregconsave.cse.value[i] != regcon.cse.value[i]) 3408 regcon.cse.mval &= m; 3409 if (pregconsave.immed.value[i] != regcon.immed.value[i]) 3410 regcon.immed.mval &= m; 3411 m <<= 1; 3412 m |= 1; 3413 } 3414 //printf("regcon.cse.mval = %s, regconsave.mval = %s ",regm_str(regcon.cse.mval),regm_str(pregconsave.cse.mval)); 3415 regcon.used |= pregconsave.used; 3416 regcon.cse.mval &= pregconsave.cse.mval; 3417 regcon.immed.mval &= pregconsave.immed.mval; 3418 regcon.params &= pregconsave.params; 3419 //printf("regcon.cse.mval®con.cse.mops = %s, regcon.cse.mops = %s\n",regm_str(regcon.cse.mval & regcon.cse.mops), regm_str(regcon.cse.mops)); 3420 regcon.cse.mops &= regcon.cse.mval; 3421 } 3422 3423 }