1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1984-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod2.d, backend/cod2.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod2.d 11 */ 12 13 module dmd.backend.cod2; 14 15 version (SCPP) 16 version = COMPILE; 17 version (MARS) 18 version = COMPILE; 19 20 version (COMPILE) 21 { 22 23 import core.stdc.stdio; 24 import core.stdc.stdlib; 25 import core.stdc.string; 26 27 import dmd.backend.backend; 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.code; 31 import dmd.backend.code_x86; 32 import dmd.backend.codebuilder; 33 import dmd.backend.mem; 34 import dmd.backend.el; 35 import dmd.backend.exh; 36 import dmd.backend.global; 37 import dmd.backend.oper; 38 import dmd.backend.ty; 39 import dmd.backend.type; 40 import dmd.backend.xmm; 41 42 extern (C++): 43 44 nothrow: 45 46 int REGSIZE(); 47 48 extern __gshared CGstate cgstate; 49 extern __gshared ubyte[FLMAX] segfl; 50 extern __gshared bool[FLMAX] stackfl; 51 52 __gshared int cdcmp_flag; 53 54 private extern (D) uint mask(uint m) { return 1 << m; } 55 56 // from divcoeff.c 57 extern (C) 58 { 59 bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost); 60 bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost); 61 } 62 63 /******************************* 64 * Swap two registers. 65 */ 66 67 private void swap(reg_t *a,reg_t *b) 68 { 69 const tmp = *a; 70 *a = *b; 71 *b = tmp; 72 } 73 74 75 /******************************************* 76 * Returns: true if cannot use this EA in anything other than a MOV instruction. 77 */ 78 79 bool movOnly(const elem *e) 80 { 81 if (config.exe & EX_OSX64 && config.flags3 & CFG3pic && e.Eoper == OPvar) 82 { 83 const s = e.EV.Vsym; 84 // Fixups for these can only be done with a MOV 85 if (s.Sclass == SCglobal || s.Sclass == SCextern || 86 s.Sclass == SCcomdat || s.Sclass == SCcomdef) 87 return true; 88 } 89 return false; 90 } 91 92 /******************************** 93 * Determine index registers used by addressing mode. 94 * Index is rm of modregrm field. 95 * Returns: 96 * mask of index registers 97 */ 98 99 regm_t idxregm(const code* c) 100 { 101 const rm = c.Irm; 102 regm_t idxm; 103 if ((rm & 0xC0) != 0xC0) /* if register is not the destination */ 104 { 105 if (I16) 106 { 107 static immutable ubyte[8] idxrm = [mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX]; 108 idxm = idxrm[rm & 7]; 109 } 110 else 111 { 112 if ((rm & 7) == 4) /* if sib byte */ 113 { 114 const sib = c.Isib; 115 reg_t idxreg = (sib >> 3) & 7; 116 // scaled index reg 117 idxm = mask(idxreg | ((c.Irex & REX_X) ? 8 : 0)); 118 119 if ((sib & 7) == 5 && (rm & 0xC0) == 0) 120 { } 121 else 122 idxm |= mask((sib & 7) | ((c.Irex & REX_B) ? 8 : 0)); 123 } 124 else 125 idxm = mask((rm & 7) | ((c.Irex & REX_B) ? 8 : 0)); 126 } 127 } 128 return idxm; 129 } 130 131 132 static if (TARGET_WINDOS) 133 { 134 /*************************** 135 * Gen code for call to floating point routine. 136 */ 137 138 void opdouble(ref CodeBuilder cdb, elem *e,regm_t *pretregs,uint clib) 139 { 140 if (config.inline8087) 141 { 142 orth87(cdb,e,pretregs); 143 return; 144 } 145 146 regm_t retregs1,retregs2; 147 if (tybasic(e.EV.E1.Ety) == TYfloat) 148 { 149 clib += CLIB.fadd - CLIB.dadd; /* convert to float operation */ 150 retregs1 = FLOATREGS; 151 retregs2 = FLOATREGS2; 152 } 153 else 154 { 155 if (I32) 156 { retregs1 = DOUBLEREGS_32; 157 retregs2 = DOUBLEREGS2_32; 158 } 159 else 160 { retregs1 = mSTACK; 161 retregs2 = DOUBLEREGS_16; 162 } 163 } 164 165 codelem(cdb,e.EV.E1, &retregs1,false); 166 if (retregs1 & mSTACK) 167 cgstate.stackclean++; 168 scodelem(cdb,e.EV.E2, &retregs2, retregs1 & ~mSTACK, false); 169 if (retregs1 & mSTACK) 170 cgstate.stackclean--; 171 callclib(cdb, e, clib, pretregs, 0); 172 } 173 } 174 175 /***************************** 176 * Handle operators which are more or less orthogonal 177 * ( + - & | ^ ) 178 */ 179 180 void cdorth(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 181 { 182 //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 183 elem *e1 = e.EV.E1; 184 elem *e2 = e.EV.E2; 185 if (*pretregs == 0) // if don't want result 186 { 187 codelem(cdb,e1,pretregs,false); // eval left leaf 188 *pretregs = 0; // in case they got set 189 codelem(cdb,e2,pretregs,false); 190 return; 191 } 192 193 const ty = tybasic(e.Ety); 194 const ty1 = tybasic(e1.Ety); 195 196 if (tyfloating(ty1)) 197 { 198 if (tyvector(ty1) || 199 config.fpxmmregs && tyxmmreg(ty1) && 200 !(*pretregs & mST0) && 201 !(*pretregs & mST01) && 202 !(ty == TYldouble || ty == TYildouble) // watch out for shrinkLongDoubleConstantIfPossible() 203 ) 204 { 205 orthxmm(cdb,e,pretregs); 206 return; 207 } 208 if (config.inline8087) 209 { 210 orth87(cdb,e,pretregs); 211 return; 212 } 213 static if (TARGET_WINDOS) 214 { 215 opdouble(cdb,e,pretregs,(e.Eoper == OPadd) ? CLIB.dadd 216 : CLIB.dsub); 217 return; 218 } 219 else 220 { 221 assert(0); 222 } 223 } 224 if (tyxmmreg(ty1)) 225 { 226 orthxmm(cdb,e,pretregs); 227 return; 228 } 229 230 opcode_t op1, op2; 231 uint mode; 232 __gshared int nest; 233 234 const ty2 = tybasic(e2.Ety); 235 const e2oper = e2.Eoper; 236 const sz = _tysize[ty]; 237 const isbyte = (sz == 1); 238 code_flags_t word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 239 bool test = false; // assume we destroyed lvalue 240 241 switch (e.Eoper) 242 { 243 case OPadd: mode = 0; 244 op1 = 0x03; op2 = 0x13; break; /* ADD, ADC */ 245 case OPmin: mode = 5; 246 op1 = 0x2B; op2 = 0x1B; break; /* SUB, SBB */ 247 case OPor: mode = 1; 248 op1 = 0x0B; op2 = 0x0B; break; /* OR , OR */ 249 case OPxor: mode = 6; 250 op1 = 0x33; op2 = 0x33; break; /* XOR, XOR */ 251 case OPand: mode = 4; 252 op1 = 0x23; op2 = 0x23; /* AND, AND */ 253 if (tyreg(ty1) && 254 *pretregs == mPSW) /* if flags only */ 255 { 256 test = true; 257 op1 = 0x85; /* TEST */ 258 mode = 0; 259 } 260 break; 261 262 default: 263 assert(0); 264 } 265 op1 ^= isbyte; /* if byte operation */ 266 267 // Compute numwords, the number of words to operate on. 268 int numwords = 1; 269 if (!I16) 270 { 271 /* Cannot operate on longs and then do a 'paint' to a far */ 272 /* pointer, because far pointers are 48 bits and longs are 32. */ 273 /* Therefore, numwords can never be 2. */ 274 assert(!(tyfv(ty1) && tyfv(ty2))); 275 if (sz == 2 * REGSIZE) 276 { 277 numwords++; 278 } 279 } 280 else 281 { 282 /* If ty is a TYfptr, but both operands are long, treat the */ 283 /* operation as a long. */ 284 if ((tylong(ty1) || ty1 == TYhptr) && 285 (tylong(ty2) || ty2 == TYhptr)) 286 numwords++; 287 } 288 289 // Special cases where only flags are set 290 if (test && _tysize[ty1] <= REGSIZE && 291 (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 292 && !movOnly(e1) 293 ) 294 { 295 // Handle the case of (var & const) 296 if (e2.Eoper == OPconst && el_signx32(e2)) 297 { 298 code cs = void; 299 cs.Iflags = 0; 300 cs.Irex = 0; 301 getlvalue(cdb,&cs,e1,0); 302 targ_size_t value = e2.EV.Vpointer; 303 if (sz == 2) 304 value &= 0xFFFF; 305 else if (sz == 4) 306 value &= 0xFFFFFFFF; 307 reg_t reg; 308 if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,value,®)) 309 { 310 code_newreg(&cs, reg); 311 if (I64 && isbyte && reg >= 4) 312 cs.Irex |= REX; 313 } 314 else 315 { 316 if (sz == 8 && !I64) 317 { 318 assert(value == cast(int)value); // sign extend imm32 319 } 320 op1 = 0xF7; 321 cs.IEV2.Vint = cast(targ_int)value; 322 cs.IFL2 = FLconst; 323 } 324 cs.Iop = op1 ^ isbyte; 325 cs.Iflags |= word | CFpsw; 326 freenode(e1); 327 freenode(e2); 328 cdb.gen(&cs); 329 return; 330 } 331 332 // Handle (exp & reg) 333 reg_t reg; 334 regm_t retregs; 335 if (isregvar(e2,&retregs,®)) 336 { 337 code cs = void; 338 cs.Iflags = 0; 339 cs.Irex = 0; 340 getlvalue(cdb,&cs,e1,0); 341 code_newreg(&cs, reg); 342 if (I64 && isbyte && reg >= 4) 343 cs.Irex |= REX; 344 cs.Iop = op1 ^ isbyte; 345 cs.Iflags |= word | CFpsw; 346 freenode(e1); 347 freenode(e2); 348 cdb.gen(&cs); 349 return; 350 } 351 } 352 353 code cs = void; 354 cs.Iflags = 0; 355 cs.Irex = 0; 356 357 // Look for possible uses of LEA 358 if (e.Eoper == OPadd && 359 !(*pretregs & mPSW) && // flags aren't set by LEA 360 !nest && // could cause infinite recursion if e.Ecount 361 (sz == REGSIZE || (I64 && sz == 4))) // far pointers aren't handled 362 { 363 const rex = (sz == 8) ? REX_W : 0; 364 365 // Handle the case of (e + &var) 366 int e1oper = e1.Eoper; 367 if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2.Ecount && stackfl[el_fl(e2)]))) 368 || // LEA costs too much for simple EAs on older CPUs 369 (e2oper == OPconst && (e1.Eoper == OPcall || e1.Eoper == OPcallns) && !(*pretregs & mAX)) || 370 (!I16 && (isscaledindex(e1) || isscaledindex(e2))) || 371 (!I16 && e1oper == OPvar && e1.EV.Vsym.Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2.EV.Vsym.Sfl == FLreg))) || 372 (e2oper == OPconst && e1oper == OPeq && e1.EV.E1.Eoper == OPvar) || 373 (!I16 && (e2oper == OPrelconst || e2oper == OPconst) && !e1.Ecount && 374 (e1oper == OPmul || e1oper == OPshl) && 375 e1.EV.E2.Eoper == OPconst && 376 ssindex(e1oper,e1.EV.E2.EV.Vuns) 377 ) || 378 (!I16 && e1.Ecount) 379 ) 380 { 381 const inc = e.Ecount != 0; 382 nest += inc; 383 code csx = void; 384 getlvalue(cdb,&csx,e,0); 385 nest -= inc; 386 reg_t regx; 387 allocreg(cdb,pretregs,®x,ty); 388 csx.Iop = LEA; 389 code_newreg(&csx, regx); 390 cdb.gen(&csx); // LEA regx,EA 391 if (rex) 392 code_orrex(cdb.last(), rex); 393 return; 394 } 395 396 // Handle the case of ((e + c) + e2) 397 if (!I16 && 398 e1oper == OPadd && 399 (e1.EV.E2.Eoper == OPconst && el_signx32(e1.EV.E2) || 400 e2oper == OPconst && el_signx32(e2)) && 401 !e1.Ecount 402 ) 403 { 404 elem *ebase; 405 elem *edisp; 406 if (e2oper == OPconst && el_signx32(e2)) 407 { edisp = e2; 408 ebase = e1.EV.E2; 409 } 410 else 411 { edisp = e1.EV.E2; 412 ebase = e2; 413 } 414 415 auto e11 = e1.EV.E1; 416 regm_t retregs = *pretregs & ALLREGS; 417 if (!retregs) 418 retregs = ALLREGS; 419 int ss = 0; 420 int ss2 = 0; 421 422 // Handle the case of (((e * c1) + c2) + e2) 423 // Handle the case of (((e << c1) + c2) + e2) 424 if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && 425 e11.EV.E2.Eoper == OPconst && 426 !e11.Ecount 427 ) 428 { 429 const co1 = cast(targ_size_t)el_tolong(e11.EV.E2); 430 if (e11.Eoper == OPshl) 431 { 432 if (co1 > 3) 433 goto L13; 434 ss = cast(int)co1; 435 } 436 else 437 { 438 ss2 = 1; 439 switch (co1) 440 { 441 case 6: ss = 1; break; 442 case 12: ss = 1; ss2 = 2; break; 443 case 24: ss = 1; ss2 = 3; break; 444 case 10: ss = 2; break; 445 case 20: ss = 2; ss2 = 2; break; 446 case 40: ss = 2; ss2 = 3; break; 447 case 18: ss = 3; break; 448 case 36: ss = 3; ss2 = 2; break; 449 case 72: ss = 3; ss2 = 3; break; 450 default: 451 ss2 = 0; 452 goto L13; 453 } 454 } 455 freenode(e11.EV.E2); 456 freenode(e11); 457 e11 = e11.EV.E1; 458 L13: 459 { } 460 } 461 462 reg_t reg11; 463 regm_t regm; 464 if (e11.Eoper == OPvar && isregvar(e11,®m,®11)) 465 { 466 if (tysize(e11.Ety) <= REGSIZE) 467 retregs = mask(reg11); // only want the LSW 468 else 469 retregs = regm; 470 freenode(e11); 471 } 472 else 473 codelem(cdb,e11,&retregs,false); 474 475 regm_t rretregs = ALLREGS & ~retregs & ~mBP; 476 scodelem(cdb,ebase,&rretregs,retregs,true); 477 reg_t reg; 478 { 479 regm_t sregs = *pretregs & ~rretregs; 480 if (!sregs) 481 sregs = ALLREGS & ~rretregs; 482 allocreg(cdb,&sregs,®,ty); 483 } 484 485 assert((retregs & (retregs - 1)) == 0); // must be only one register 486 assert((rretregs & (rretregs - 1)) == 0); // must be only one register 487 488 auto reg1 = findreg(retregs); 489 const reg2 = findreg(rretregs); 490 491 if (ss2) 492 { 493 assert(reg != reg2); 494 if ((reg1 & 7) == BP) 495 { static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1]; 496 497 // IMUL reg,imm32 498 cdb.genc2(0x69,modregxrmx(3,reg,reg1),imm32[ss]); 499 } 500 else 501 { // LEA reg,[reg1*ss][reg1] 502 cdb.gen2sib(LEA,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7)); 503 if (reg1 & 8) 504 code_orrex(cdb.last(), REX_X | REX_B); 505 } 506 if (rex) 507 code_orrex(cdb.last(), rex); 508 reg1 = reg; 509 ss = ss2; // use *2 for scale 510 } 511 512 cs.Iop = LEA; // LEA reg,c[reg1*ss][reg2] 513 cs.Irm = modregrm(2,reg & 7,4); 514 cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7); 515 assert(reg2 != BP); 516 cs.Iflags = CFoff; 517 cs.Irex = cast(ubyte)rex; 518 if (reg & 8) 519 cs.Irex |= REX_R; 520 if (reg1 & 8) 521 cs.Irex |= REX_X; 522 if (reg2 & 8) 523 cs.Irex |= REX_B; 524 cs.IFL1 = FLconst; 525 cs.IEV1.Vsize_t = edisp.EV.Vuns; 526 527 freenode(edisp); 528 freenode(e1); 529 cdb.gen(&cs); 530 fixresult(cdb,e,mask(reg),pretregs); 531 return; 532 } 533 } 534 535 regm_t posregs = (isbyte) ? BYTEREGS : (mES | ALLREGS | mBP); 536 regm_t retregs = *pretregs & posregs; 537 if (retregs == 0) /* if no return regs speced */ 538 /* (like if wanted flags only) */ 539 retregs = ALLREGS & posregs; // give us some 540 541 if (ty1 == TYhptr || ty2 == TYhptr) 542 { /* Generate code for add/subtract of huge pointers. 543 No attempt is made to generate very good code. 544 */ 545 retregs = (retregs & mLSW) | mDX; 546 regm_t rretregs; 547 if (ty1 == TYhptr) 548 { // hptr +- long 549 rretregs = mLSW & ~(retregs | regcon.mvar); 550 if (!rretregs) 551 rretregs = mLSW; 552 rretregs |= mCX; 553 codelem(cdb,e1,&rretregs,0); 554 retregs &= ~rretregs; 555 if (!(retregs & mLSW)) 556 retregs |= mLSW & ~rretregs; 557 558 scodelem(cdb,e2,&retregs,rretregs,true); 559 } 560 else 561 { // long + hptr 562 codelem(cdb,e1,&retregs,0); 563 rretregs = (mLSW | mCX) & ~retregs; 564 if (!(rretregs & mLSW)) 565 rretregs |= mLSW; 566 scodelem(cdb,e2,&rretregs,retregs,true); 567 } 568 getregs(cdb,rretregs | retregs); 569 const mreg = DX; 570 const lreg = findreglsw(retregs); 571 if (e.Eoper == OPmin) 572 { // negate retregs 573 cdb.gen2(0xF7,modregrm(3,3,mreg)); // NEG mreg 574 cdb.gen2(0xF7,modregrm(3,3,lreg)); // NEG lreg 575 code_orflag(cdb.last(),CFpsw); 576 cdb.genc2(0x81,modregrm(3,3,mreg),0); // SBB mreg,0 577 } 578 const lrreg = findreglsw(rretregs); 579 genregs(cdb,0x03,lreg,lrreg); // ADD lreg,lrreg 580 code_orflag(cdb.last(),CFpsw); 581 genmovreg(cdb,lrreg,CX); // MOV lrreg,CX 582 cdb.genc2(0x81,modregrm(3,2,mreg),0); // ADC mreg,0 583 genshift(cdb); // MOV CX,offset __AHSHIFT 584 cdb.gen2(0xD3,modregrm(3,4,mreg)); // SHL mreg,CL 585 genregs(cdb,0x03,mreg,lrreg); // ADD mreg,MSREG(h) 586 fixresult(cdb,e,retregs,pretregs); 587 return; 588 } 589 590 regm_t rretregs; 591 reg_t reg; 592 if (_tysize[ty1] > REGSIZE && numwords == 1) 593 { /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */ 594 595 debug 596 if (_tysize[ty2] != REGSIZE) 597 { 598 printf("e = %p, e.Eoper = ",e); 599 WROP(e.Eoper); 600 printf(" e1.Ety = "); 601 WRTYxx(ty1); 602 printf(" e2.Ety = "); 603 WRTYxx(ty2); 604 printf("\n"); 605 elem_print(e); 606 } 607 608 assert(_tysize[ty2] == REGSIZE); 609 610 /* Watch out for the case here where you are going to OP reg,EA */ 611 /* and both the reg and EA use ES! Prevent this by forcing */ 612 /* reg into the regular registers. */ 613 if ((e2oper == OPind || 614 (e2oper == OPvar && el_fl(e2) == FLfardata)) && 615 !e2.Ecount) 616 { 617 retregs = ALLREGS; 618 } 619 620 codelem(cdb,e1,&retregs,test != 0); 621 reg = findreglsw(retregs); /* reg is the register with the offset*/ 622 } 623 else 624 { 625 regm_t regm; 626 627 /* if (tyword + TYfptr) */ 628 if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE) 629 { retregs = ~*pretregs & ALLREGS; 630 631 /* if retregs doesn't have any regs in it that aren't reg vars */ 632 if ((retregs & ~regcon.mvar) == 0) 633 retregs |= mAX; 634 } 635 else if (numwords == 2 && retregs & mES) 636 retregs = (retregs | mMSW) & ALLREGS; 637 638 // Determine if we should swap operands, because 639 // mov EAX,x 640 // add EAX,reg 641 // is faster than: 642 // mov EAX,reg 643 // add EAX,x 644 else if (e2oper == OPvar && 645 e1.Eoper == OPvar && 646 e.Eoper != OPmin && 647 isregvar(e1,®m,null) && 648 regm != retregs && 649 _tysize[ty1] == _tysize[ty2]) 650 { 651 elem *es = e1; 652 e1 = e2; 653 e2 = es; 654 } 655 codelem(cdb,e1,&retregs,test != 0); // eval left leaf 656 reg = findreg(retregs); 657 } 658 reg_t rreg; 659 int rval; 660 targ_size_t i; 661 switch (e2oper) 662 { 663 case OPind: /* if addressing mode */ 664 if (!e2.Ecount) /* if not CSE */ 665 goto L1; /* try OP reg,EA */ 666 goto default; 667 668 default: /* operator node */ 669 L2: 670 rretregs = ALLREGS & ~retregs; 671 /* Be careful not to do arithmetic on ES */ 672 if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE && *pretregs != mPSW) 673 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs; 674 else if (isbyte) 675 rretregs &= BYTEREGS; 676 677 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 678 rreg = (_tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs); 679 if (!test) 680 getregs(cdb,retregs); // we will trash these regs 681 if (numwords == 1) /* ADD reg,rreg */ 682 { 683 /* reverse operands to avoid moving around the segment value */ 684 if (_tysize[ty2] > REGSIZE) 685 { 686 getregs(cdb,rretregs); 687 genregs(cdb,op1,rreg,reg); 688 retregs = rretregs; // reverse operands 689 } 690 else 691 { 692 genregs(cdb,op1,reg,rreg); 693 if (!I16 && *pretregs & mPSW) 694 cdb.last().Iflags |= word; 695 } 696 if (I64 && sz == 8) 697 code_orrex(cdb.last(), REX_W); 698 if (I64 && isbyte && (reg >= 4 || rreg >= 4)) 699 code_orrex(cdb.last(), REX); 700 } 701 else /* numwords == 2 */ /* ADD lsreg,lsrreg */ 702 { 703 reg = findreglsw(retregs); 704 rreg = findreglsw(rretregs); 705 genregs(cdb,op1,reg,rreg); 706 if (e.Eoper == OPadd || e.Eoper == OPmin) 707 code_orflag(cdb.last(),CFpsw); 708 reg = findregmsw(retregs); 709 rreg = findregmsw(rretregs); 710 if (!(e2oper == OPu16_32 && // if second operand is 0 711 (op2 == 0x0B || op2 == 0x33)) // and OR or XOR 712 ) 713 genregs(cdb,op2,reg,rreg); // ADC msreg,msrreg 714 } 715 break; 716 717 case OPrelconst: 718 if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) 719 goto default; 720 if (sz != REGSIZE) 721 goto L2; 722 if (segfl[el_fl(e2)] != 3) /* if not in data segment */ 723 goto L2; 724 if (evalinregister(e2)) 725 goto L2; 726 cs.IEV2.Voffset = e2.EV.Voffset; 727 cs.IEV2.Vsym = e2.EV.Vsym; 728 cs.Iflags |= CFoff; 729 i = 0; /* no INC or DEC opcode */ 730 rval = 0; 731 goto L3; 732 733 case OPconst: 734 if (tyfv(ty2)) 735 goto L2; 736 if (numwords == 1) 737 { 738 if (!el_signx32(e2)) 739 goto L2; 740 i = e2.EV.Vpointer; 741 if (word) 742 { 743 if (!(*pretregs & mPSW) && 744 config.flags4 & CFG4speed && 745 (e.Eoper == OPor || e.Eoper == OPxor || test || 746 (e1.Eoper != OPvar && e1.Eoper != OPind))) 747 { word = 0; 748 i &= 0xFFFF; 749 } 750 } 751 rval = reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&rreg); 752 cs.IEV2.Vsize_t = i; 753 L3: 754 if (!test) 755 getregs(cdb,retregs); // we will trash these regs 756 op1 ^= isbyte; 757 cs.Iflags |= word; 758 if (rval) 759 { cs.Iop = op1 ^ 2; 760 mode = rreg; 761 } 762 else 763 cs.Iop = 0x81; 764 cs.Irm = modregrm(3,mode&7,reg&7); 765 if (mode & 8) 766 cs.Irex |= REX_R; 767 if (reg & 8) 768 cs.Irex |= REX_B; 769 if (I64 && sz == 8) 770 cs.Irex |= REX_W; 771 if (I64 && isbyte && (reg >= 4 || (rval && rreg >= 4))) 772 cs.Irex |= REX; 773 cs.IFL2 = cast(ubyte)((e2.Eoper == OPconst) ? FLconst : el_fl(e2)); 774 /* Modify instruction for special cases */ 775 switch (e.Eoper) 776 { 777 case OPadd: 778 { 779 int iop; 780 781 if (i == 1) 782 iop = 0; /* INC reg */ 783 else if (i == -1) 784 iop = 8; /* DEC reg */ 785 else 786 break; 787 cs.Iop = (0x40 | iop | reg) ^ isbyte; 788 if ((isbyte && *pretregs & mPSW) || I64) 789 { 790 cs.Irm = cast(ubyte)(modregrm(3,0,reg & 7) | iop); 791 cs.Iop = 0xFF; 792 } 793 break; 794 } 795 796 case OPand: 797 if (test) 798 cs.Iop = rval ? op1 : 0xF7; // TEST 799 break; 800 801 default: 802 break; 803 } 804 if (*pretregs & mPSW) 805 cs.Iflags |= CFpsw; 806 cs.Iop ^= isbyte; 807 cdb.gen(&cs); 808 cs.Iflags &= ~CFpsw; 809 } 810 else if (numwords == 2) 811 { 812 getregs(cdb,retregs); 813 reg = findregmsw(retregs); 814 const lsreg = findreglsw(retregs); 815 cs.Iop = 0x81; 816 cs.Irm = modregrm(3,mode,lsreg); 817 cs.IFL2 = FLconst; 818 const msw = cast(targ_int)MSREG(e2.EV.Vllong); 819 cs.IEV2.Vint = e2.EV.Vlong; 820 switch (e.Eoper) 821 { 822 case OPadd: 823 case OPmin: 824 cs.Iflags |= CFpsw; 825 break; 826 827 default: 828 break; 829 } 830 cdb.gen(&cs); 831 cs.Iflags &= ~CFpsw; 832 833 cs.Irm = cast(ubyte)((cs.Irm & modregrm(3,7,0)) | reg); 834 cs.IEV2.Vint = msw; 835 if (e.Eoper == OPadd) 836 cs.Irm |= modregrm(0,2,0); /* ADC */ 837 cdb.gen(&cs); 838 } 839 else 840 assert(0); 841 freenode(e2); 842 break; 843 844 case OPvar: 845 if (movOnly(e2)) 846 goto L2; 847 L1: 848 if (tyfv(ty2)) 849 goto L2; 850 if (!test) 851 getregs(cdb,retregs); // we will trash these regs 852 loadea(cdb,e2,&cs,op1, 853 ((numwords == 2) ? findreglsw(retregs) : reg), 854 0,retregs,retregs); 855 if (!I16 && word) 856 { if (*pretregs & mPSW) 857 code_orflag(cdb.last(),word); 858 else 859 cdb.last().Iflags &= ~cast(int)word; 860 } 861 else if (numwords == 2) 862 { 863 if (e.Eoper == OPadd || e.Eoper == OPmin) 864 code_orflag(cdb.last(),CFpsw); 865 reg = findregmsw(retregs); 866 if (!OTleaf(e2.Eoper)) 867 { getlvalue_msw(&cs); 868 cs.Iop = op2; 869 NEWREG(cs.Irm,reg); 870 cdb.gen(&cs); // ADC reg,data+2 871 } 872 else 873 loadea(cdb,e2,&cs,op2,reg,REGSIZE,retregs,0); 874 } 875 else if (I64 && sz == 8) 876 code_orrex(cdb.last(), REX_W); 877 freenode(e2); 878 break; 879 } 880 881 if (sz <= REGSIZE && *pretregs & mPSW) 882 { 883 /* If the expression is (_tls_array + ...), then the flags are not set 884 * since the linker may rewrite these instructions into something else. 885 */ 886 if (I64 && e.Eoper == OPadd && e1.Eoper == OPvar) 887 { 888 const s = e1.EV.Vsym; 889 if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) 890 { 891 goto L7; // don't assume flags are set 892 } 893 } 894 code_orflag(cdb.last(),CFpsw); 895 *pretregs &= ~mPSW; // flags already set 896 L7: { } 897 } 898 fixresult(cdb,e,retregs,pretregs); 899 } 900 901 902 /***************************** 903 * Handle multiply, divide, modulo and remquo. 904 * Note that modulo isn't defined for doubles. 905 */ 906 907 void cdmul(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 908 { 909 elem *e1 = e.EV.E1; 910 elem *e2 = e.EV.E2; 911 if (*pretregs == 0) // if don't want result 912 { 913 codelem(cdb,e1,pretregs,false); // eval left leaf 914 *pretregs = 0; // in case they got set 915 codelem(cdb,e2,pretregs,false); 916 return; 917 } 918 919 //printf("cdmul(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 920 const tyml = tybasic(e1.Ety); 921 const ty = tybasic(e.Ety); 922 const oper = e.Eoper; 923 924 if (tyfloating(tyml)) 925 { 926 if (tyvector(tyml) || 927 config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) && 928 !(*pretregs & mST0) && 929 !(ty == TYldouble || ty == TYildouble) && // watch out for shrinkLongDoubleConstantIfPossible() 930 !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div 931 !(ty == TYllong) // or passing to function through integer register 932 ) 933 { 934 orthxmm(cdb,e,pretregs); 935 return; 936 } 937 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 938 orth87(cdb,e,pretregs); 939 else 940 opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv); 941 942 return; 943 } 944 945 if (tyxmmreg(tyml)) 946 { 947 orthxmm(cdb,e,pretregs); 948 return; 949 } 950 951 uint lib; 952 regm_t keepregs = 0; 953 regm_t resreg; 954 ubyte op; 955 const uns = tyuns(tyml) || tyuns(e2.Ety); // 1 if uint operation, 0 if not 956 957 switch (oper) 958 { 959 case OPmul: 960 resreg = mAX; 961 op = 5 - uns; 962 lib = CLIB.lmul; 963 break; 964 965 case OPdiv: 966 resreg = mAX; 967 op = 7 - uns; 968 lib = uns ? CLIB.uldiv : CLIB.ldiv; 969 if (I32) 970 keepregs |= mSI | mDI; 971 break; 972 973 case OPmod: 974 resreg = mDX; 975 op = 7 - uns; 976 lib = uns ? CLIB.ulmod : CLIB.lmod; 977 if (I32) 978 keepregs |= mSI | mDI; 979 break; 980 981 case OPremquo: 982 resreg = mDX | mAX; 983 op = 7 - uns; 984 lib = uns ? CLIB.uldiv : CLIB.ldiv; 985 if (I32) 986 keepregs |= mSI | mDI; 987 break; 988 989 default: 990 assert(0); 991 } 992 993 regm_t retregs; 994 regm_t rretregs; 995 const isbyte = tybyte(e.Ety) != 0; 996 const sz = _tysize[tyml]; 997 if (sz <= REGSIZE) // dedicated regs for mul & div 998 { retregs = mAX; 999 // pick some other regs 1000 rretregs = isbyte ? BYTEREGS & ~mAX 1001 : ALLREGS & ~(mAX|mDX); 1002 } 1003 else 1004 { 1005 assert(sz <= 2 * REGSIZE); 1006 retregs = mDX | mAX; 1007 rretregs = mCX | mBX; // second arg 1008 } 1009 1010 reg_t rreg; 1011 int pow2; 1012 1013 const ubyte rex = (I64 && sz == 8) ? REX_W : 0; 1014 const uint grex = rex << 16; 1015 const OPER opunslng = I16 ? OPu16_32 : OPu32_64; 1016 1017 code cs = void; 1018 cs.Iflags = 0; 1019 cs.Irex = 0; 1020 1021 switch (e2.Eoper) 1022 { 1023 case OPu16_32: 1024 case OPs16_32: 1025 case OPu32_64: 1026 case OPs32_64: 1027 { 1028 if (sz != 2 * REGSIZE || oper != OPmul || e1.Eoper != e2.Eoper || 1029 e1.Ecount || e2.Ecount) 1030 goto L2; 1031 const ubyte opx = (e2.Eoper == opunslng) ? 4 : 5; 1032 regm_t retregsx = mAX; 1033 codelem(cdb,e1.EV.E1,&retregsx,false); // eval left leaf 1034 if (e2.EV.E1.Eoper == OPvar || 1035 (e2.EV.E1.Eoper == OPind && !e2.EV.E1.Ecount) 1036 ) 1037 { 1038 loadea(cdb,e2.EV.E1,&cs,0xF7,opx,0,mAX,mAX | mDX); 1039 } 1040 else 1041 { 1042 regm_t rretregsx = ALLREGS & ~mAX; 1043 scodelem(cdb,e2.EV.E1,&rretregsx,retregs,true); // get rvalue 1044 getregs(cdb,mAX | mDX); 1045 const rregx = findreg(rretregsx); 1046 cdb.gen2(0xF7,grex | modregrmx(3,opx,rregx)); // OP AX,rregx 1047 } 1048 freenode(e.EV.E1); 1049 freenode(e2); 1050 fixresult(cdb,e,mAX | mDX,pretregs); 1051 return; 1052 } 1053 1054 case OPconst: 1055 auto d = cast(targ_size_t)el_tolong(e2); 1056 bool neg = false; 1057 const e2factor = d; 1058 if (!uns && cast(targ_llong)e2factor < 0) 1059 { neg = true; 1060 d = -d; 1061 } 1062 1063 // Multiply by a constant 1064 if (oper == OPmul && I32 && sz == REGSIZE * 2) 1065 { 1066 /* IMUL EDX,EDX,lsw 1067 IMUL reg,EAX,msw 1068 ADD reg,EDX 1069 MOV EDX,lsw 1070 MUL EDX 1071 ADD EDX,reg 1072 1073 if (msw == 0) 1074 IMUL reg,EDX,lsw 1075 MOV EDX,lsw 1076 MUL EDX 1077 ADD EDX,reg 1078 */ 1079 codelem(cdb,e1,&retregs,false); // eval left leaf 1080 regm_t scratch = allregs & ~(mAX | mDX); 1081 reg_t reg; 1082 allocreg(cdb,&scratch,®,TYint); 1083 getregs(cdb,mDX | mAX); 1084 1085 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1)); 1086 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8)); 1087 1088 if (msw) 1089 { 1090 genmulimm(cdb,DX,DX,lsw); 1091 genmulimm(cdb,reg,AX,msw); 1092 cdb.gen2(0x03,modregrm(3,reg,DX)); 1093 } 1094 else 1095 genmulimm(cdb,reg,DX,lsw); 1096 1097 movregconst(cdb,DX,lsw,0); // MOV EDX,lsw 1098 getregs(cdb,mDX); 1099 cdb.gen2(0xF7,modregrm(3,4,DX)); // MUL EDX 1100 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD EDX,reg 1101 1102 const resregx = mDX | mAX; 1103 freenode(e2); 1104 fixresult(cdb,e,resregx,pretregs); 1105 return; 1106 } 1107 1108 // Signed divide by a constant 1109 if (oper != OPmul && 1110 (d & (d - 1)) && 1111 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) && 1112 config.flags4 & CFG4speed && !uns) 1113 { 1114 /* R1 / 10 1115 * 1116 * MOV EAX,m 1117 * IMUL R1 1118 * MOV EAX,R1 1119 * SAR EAX,31 1120 * SAR EDX,shpost 1121 * SUB EDX,EAX 1122 * IMUL EAX,EDX,d 1123 * SUB R1,EAX 1124 * 1125 * EDX = quotient 1126 * R1 = remainder 1127 */ 1128 assert(sz == 4 || sz == 8); 1129 1130 ulong m; 1131 int shpost; 1132 const int N = sz * 8; 1133 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost); 1134 1135 regm_t regm = allregs & ~(mAX | mDX); 1136 codelem(cdb,e1,®m,false); // eval left leaf 1137 const reg_t reg = findreg(regm); 1138 getregs(cdb,regm | mDX | mAX); 1139 1140 /* Algorithm 5.2 1141 * if m>=2**(N-1) 1142 * q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n) 1143 * else 1144 * q = SRA(MULSH(m,n), shpost) - XSIGN(n) 1145 * if (neg) 1146 * q = -q 1147 */ 1148 const bool mgt = mhighbit || m >= (1UL << (N - 1)); 1149 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EAX,m 1150 cdb.gen2(0xF7,grex | modregrmx(3,5,reg)); // IMUL R1 1151 if (mgt) 1152 cdb.gen2(0x03,grex | modregrmx(3,DX,reg)); // ADD EDX,R1 1153 getregsNoSave(mAX); // EAX no longer contains 'm' 1154 genmovreg(cdb, AX, reg); // MOV EAX,R1 1155 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1); // SAR EAX,31 1156 if (shpost) 1157 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost); // SAR EDX,shpost 1158 reg_t r3; 1159 if (neg && oper == OPdiv) 1160 { 1161 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1162 r3 = AX; 1163 } 1164 else 1165 { 1166 cdb.gen2(0x2B,grex | modregrm(3,DX,AX)); // SUB EDX,EAX 1167 r3 = DX; 1168 } 1169 1170 // r3 is quotient 1171 regm_t resregx; 1172 switch (oper) 1173 { case OPdiv: 1174 resregx = mask(r3); 1175 break; 1176 1177 case OPmod: 1178 assert(reg != AX && r3 == DX); 1179 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1180 { 1181 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1182 } 1183 else 1184 { 1185 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1186 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1187 getregsNoSave(mAX); // EAX no longer contains 'd' 1188 } 1189 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1190 resregx = regm; 1191 break; 1192 1193 case OPremquo: 1194 assert(reg != AX && r3 == DX); 1195 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1196 { 1197 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1198 } 1199 else 1200 { 1201 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1202 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1203 } 1204 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1205 genmovreg(cdb, AX, r3); // MOV EAX,r3 1206 if (neg) 1207 cdb.gen2(0xF7,grex | modregrm(3,3,AX)); // NEG EAX 1208 genmovreg(cdb, DX, reg); // MOV EDX,R1 1209 resregx = mDX | mAX; 1210 break; 1211 1212 default: 1213 assert(0); 1214 } 1215 freenode(e2); 1216 fixresult(cdb,e,resregx,pretregs); 1217 return; 1218 } 1219 1220 // Unsigned divide by a constant 1221 if (oper != OPmul && 1222 e2factor > 2 && (e2factor & (e2factor - 1)) && 1223 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) && 1224 config.flags4 & CFG4speed && uns) 1225 { 1226 assert(sz == 4 || sz == 8); 1227 1228 reg_t r3; 1229 regm_t regm; 1230 reg_t reg; 1231 ulong m; 1232 int shpre; 1233 int shpost; 1234 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost)) 1235 { 1236 /* t1 = MULUH(m, n) 1237 * q = SRL(t1 + SRL(n - t1, 1), shpost - 1) 1238 * MOV EAX,reg 1239 * MOV EDX,m 1240 * MUL EDX 1241 * MOV EAX,reg 1242 * SUB EAX,EDX 1243 * SHR EAX,1 1244 * LEA R3,[EAX][EDX] 1245 * SHR R3,shpost-1 1246 */ 1247 assert(shpre == 0); 1248 1249 regm = allregs & ~(mAX | mDX); 1250 codelem(cdb,e1,®m,false); // eval left leaf 1251 reg = findreg(regm); 1252 getregs(cdb,mAX | mDX); 1253 genmovreg(cdb,AX,reg); // MOV EAX,reg 1254 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1255 getregs(cdb,regm | mDX | mAX); 1256 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1257 genmovreg(cdb,AX,reg); // MOV EAX,reg 1258 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1259 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1); // SHR EAX,1 1260 regm_t regm3 = allregs; 1261 if (oper == OPmod || oper == OPremquo) 1262 { 1263 regm3 &= ~regm; 1264 if (oper == OPremquo || !el_signx32(e2)) 1265 regm3 &= ~mAX; 1266 } 1267 allocreg(cdb,®m3,&r3,TYint); 1268 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX] 1269 if (shpost != 1) 1270 cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1); // SHR R3,shpost-1 1271 } 1272 else 1273 { 1274 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost) 1275 * SHR EAX,shpre 1276 * MOV reg,m 1277 * MUL reg 1278 * SHR EDX,shpost 1279 */ 1280 regm = mAX; 1281 if (oper == OPmod || oper == OPremquo) 1282 regm = allregs & ~(mAX|mDX); 1283 codelem(cdb,e1,®m,false); // eval left leaf 1284 reg = findreg(regm); 1285 1286 if (reg != AX) 1287 { 1288 getregs(cdb,mAX); 1289 genmovreg(cdb,AX,reg); // MOV EAX,reg 1290 } 1291 if (shpre) 1292 { 1293 getregs(cdb,mAX); 1294 cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre); // SHR EAX,shpre 1295 } 1296 getregs(cdb,mDX); 1297 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1298 getregs(cdb,mDX | mAX); 1299 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1300 if (shpost) 1301 cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost); // SHR EDX,shpost 1302 r3 = DX; 1303 } 1304 1305 switch (oper) 1306 { case OPdiv: 1307 // r3 = quotient 1308 resreg = mask(r3); 1309 break; 1310 1311 case OPmod: 1312 /* reg = original value 1313 * r3 = quotient 1314 */ 1315 assert(!(regm & mAX)); 1316 if (el_signx32(e2)) 1317 { 1318 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1319 } 1320 else 1321 { 1322 assert(!(mask(r3) & mAX)); 1323 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1324 getregs(cdb,mAX); 1325 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1326 } 1327 getregs(cdb,regm); 1328 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1329 resreg = regm; 1330 break; 1331 1332 case OPremquo: 1333 /* reg = original value 1334 * r3 = quotient 1335 */ 1336 assert(!(mask(r3) & (mAX|regm))); 1337 assert(!(regm & mAX)); 1338 if (el_signx32(e2)) 1339 { 1340 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1341 } 1342 else 1343 { 1344 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1345 getregs(cdb,mAX); 1346 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1347 } 1348 getregs(cdb,regm); 1349 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1350 genmovreg(cdb, AX, r3); // MOV EAX,r3 1351 genmovreg(cdb, DX, reg); // MOV EDX,reg 1352 resreg = mDX | mAX; 1353 break; 1354 1355 default: 1356 assert(0); 1357 } 1358 freenode(e2); 1359 fixresult(cdb,e,resreg,pretregs); 1360 return; 1361 } 1362 1363 if (sz > REGSIZE || !el_signx32(e2)) 1364 goto L2; 1365 1366 if (oper == OPmul && config.target_cpu >= TARGET_80286) 1367 { reg_t reg; 1368 int ss; 1369 1370 freenode(e2); 1371 retregs = isbyte ? BYTEREGS : ALLREGS; 1372 resreg = *pretregs & (ALLREGS | mBP); 1373 if (!resreg) 1374 resreg = retregs; 1375 1376 if (!I16) 1377 { // See if we can use an LEA instruction 1378 int ss2 = 0; 1379 int shift; 1380 1381 switch (e2factor) 1382 { 1383 case 12: ss = 1; ss2 = 2; goto L4; 1384 case 24: ss = 1; ss2 = 3; goto L4; 1385 1386 case 6: 1387 case 3: ss = 1; goto L4; 1388 1389 case 20: ss = 2; ss2 = 2; goto L4; 1390 case 40: ss = 2; ss2 = 3; goto L4; 1391 1392 case 10: 1393 case 5: ss = 2; goto L4; 1394 1395 case 36: ss = 3; ss2 = 2; goto L4; 1396 case 72: ss = 3; ss2 = 3; goto L4; 1397 1398 case 18: 1399 case 9: ss = 3; goto L4; 1400 1401 L4: 1402 { 1403 static if (1) 1404 { 1405 regm_t regm = isbyte ? BYTEREGS : ALLREGS; 1406 regm &= ~(mBP | mR13); // don't use EBP 1407 codelem(cdb,e.EV.E1,®m,true); 1408 uint r = findreg(regm); 1409 1410 if (ss2) 1411 { // Don't use EBP 1412 resreg &= ~(mBP | mR13); 1413 if (!resreg) 1414 resreg = retregs; 1415 } 1416 allocreg(cdb,&resreg,®,tyml); 1417 1418 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1419 modregxrmx(ss,r,r)); 1420 assert((r & 7) != BP); 1421 if (ss2) 1422 { 1423 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1424 modregxrm(ss2,reg,5)); 1425 cdb.last().IFL1 = FLconst; 1426 cdb.last().IEV1.Vint = 0; 1427 } 1428 else if (!(e2factor & 1)) // if even factor 1429 { 1430 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1431 code_orrex(cdb.last(),rex); 1432 } 1433 fixresult(cdb,e,resreg,pretregs); 1434 return; 1435 } 1436 else 1437 { 1438 // Don't use EBP 1439 resreg &= ~mBP; 1440 if (!resreg) 1441 resreg = retregs; 1442 1443 codelem(cdb,e.EV.E1,&resreg,false); 1444 reg = findreg(resreg); 1445 getregs(cdb,resreg); 1446 cdb.gen2sib(LEA,modregrm(0,reg,4), 1447 modregrm(ss,reg,reg)); 1448 if (ss2) 1449 { 1450 cdb.gen2sib(LEA,modregrm(0,reg,4), 1451 modregrm(ss2,reg,5)); 1452 cdb.last().IFL1 = FLconst; 1453 cdb.last().IEV1.Vint = 0; 1454 } 1455 else if (!(e2factor & 1)) // if even factor 1456 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1457 fixresult(cdb,e,resreg,pretregs); 1458 return; 1459 } 1460 } 1461 case 37: 1462 case 74: shift = 2; 1463 goto L5; 1464 case 13: 1465 case 26: shift = 0; 1466 goto L5; 1467 L5: 1468 { 1469 // Don't use EBP 1470 resreg &= ~(mBP | mR13); 1471 if (!resreg) 1472 resreg = retregs; 1473 allocreg(cdb,&resreg,®,TYint); 1474 1475 regm_t sregm = (ALLREGS & ~mR13) & ~resreg; 1476 codelem(cdb,e.EV.E1,&sregm,false); 1477 uint sreg = findreg(sregm); 1478 getregs(cdb,resreg | sregm); 1479 // LEA reg,[sreg * 4][sreg] 1480 // SHL sreg,shift 1481 // LEA reg,[sreg * 8][reg] 1482 assert((sreg & 7) != BP); 1483 assert((reg & 7) != BP); 1484 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1485 modregxrmx(2,sreg,sreg)); 1486 if (shift) 1487 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); 1488 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1489 modregxrmx(3,sreg,reg)); 1490 if (!(e2factor & 1)) // if even factor 1491 { 1492 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1493 code_orrex(cdb.last(),rex); 1494 } 1495 fixresult(cdb,e,resreg,pretregs); 1496 return; 1497 } 1498 1499 default: 1500 break; 1501 } 1502 } 1503 1504 scodelem(cdb,e.EV.E1,&retregs,0,true); // eval left leaf 1505 const regx = findreg(retregs); 1506 allocreg(cdb,&resreg,&rreg,e.Ety); 1507 1508 // IMUL regx,imm16 1509 cdb.genc2(0x69,grex | modregxrmx(3,rreg,regx),e2factor); 1510 fixresult(cdb,e,resreg,pretregs); 1511 return; 1512 } 1513 1514 // Special code for signed divide or modulo by power of 2 1515 if ((sz == REGSIZE || (I64 && sz == 4)) && 1516 (oper == OPdiv || oper == OPmod) && !uns && 1517 (pow2 = ispow2(e2factor)) != -1 && 1518 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv) 1519 ) 1520 { 1521 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386) 1522 { 1523 // test eax,eax 1524 // jns L1 1525 // add eax,1 1526 // L1: sar eax,1 1527 1528 retregs = allregs; 1529 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1530 const reg = findreg(retregs); 1531 freenode(e2); 1532 getregs(cdb,retregs); 1533 gentstreg(cdb,reg); // TEST reg,reg 1534 code_orrex(cdb.last(), rex); 1535 code *cnop = gennop(null); 1536 genjmp(cdb,JNS,FLcode,cast(block *)cnop); // JNS cnop 1537 if (I64) 1538 { 1539 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 1540 code_orrex(cdb.last(),rex); 1541 } 1542 else 1543 cdb.gen1(0x40 + reg); // INC reg 1544 cdb.append(cnop); 1545 cdb.gen2(0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 1546 resreg = retregs; 1547 fixresult(cdb,e,resreg,pretregs); 1548 return; 1549 } 1550 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1551 freenode(e2); 1552 getregs(cdb,mAX | mDX); // modify these regs 1553 cdb.gen1(0x99); // CWD 1554 code_orrex(cdb.last(), rex); 1555 if (pow2 == 1) 1556 { 1557 if (oper == OPdiv) 1558 { 1559 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1560 cdb.gen2(0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 1561 } 1562 else // OPmod 1563 { 1564 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1565 cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1 1566 cdb.gen2(0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 1567 } 1568 } 1569 else 1570 { targ_ulong m; 1571 1572 m = (1 << pow2) - 1; 1573 if (oper == OPdiv) 1574 { 1575 cdb.genc2(0x81,grex | modregrm(3,4,DX),m); // AND DX,m 1576 cdb.gen2(0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 1577 // Be careful not to generate this for 8088 1578 assert(config.target_cpu >= TARGET_80286); 1579 cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 1580 } 1581 else // OPmod 1582 { 1583 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1584 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1585 cdb.genc2(0x81,grex | modregrm(3,4,AX),m); // AND AX,mask 1586 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1587 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1588 resreg = mAX; 1589 } 1590 } 1591 fixresult(cdb,e,resreg,pretregs); 1592 return; 1593 } 1594 goto L2; 1595 1596 case OPind: 1597 if (!e2.Ecount) // if not CSE 1598 goto L1; // try OP reg,EA 1599 goto L2; 1600 1601 default: // OPconst and operators 1602 L2: 1603 //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg)); 1604 codelem(cdb,e1,&retregs,false); // eval left leaf 1605 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1606 if (sz <= REGSIZE) 1607 { 1608 getregs(cdb,mAX | mDX); // trash these regs 1609 if (op == 7) // signed divide 1610 { 1611 cdb.gen1(0x99); // CWD 1612 code_orrex(cdb.last(),rex); 1613 } 1614 else if (op == 6) // uint divide 1615 { 1616 movregconst(cdb,DX,0,(sz == 8) ? 64 : 0); // MOV DX,0 1617 getregs(cdb,mDX); 1618 } 1619 rreg = findreg(rretregs); 1620 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,op,rreg)); // OP AX,rreg 1621 if (I64 && isbyte && rreg >= 4) 1622 code_orrex(cdb.last(), REX); 1623 fixresult(cdb,e,resreg,pretregs); 1624 } 1625 else if (sz == 2 * REGSIZE) 1626 { 1627 if (config.target_cpu >= TARGET_PentiumPro && oper == OPmul) 1628 { 1629 /* IMUL ECX,EAX 1630 IMUL EDX,EBX 1631 ADD ECX,EDX 1632 MUL EBX 1633 ADD EDX,ECX 1634 */ 1635 getregs(cdb,mAX|mDX|mCX); 1636 cdb.gen2(0x0FAF,modregrm(3,CX,AX)); 1637 cdb.gen2(0x0FAF,modregrm(3,DX,BX)); 1638 cdb.gen2(0x03,modregrm(3,CX,DX)); 1639 cdb.gen2(0xF7,modregrm(3,4,BX)); 1640 cdb.gen2(0x03,modregrm(3,DX,CX)); 1641 fixresult(cdb,e,mDX|mAX,pretregs); 1642 } 1643 else 1644 callclib(cdb,e,lib,pretregs,keepregs); 1645 } 1646 else 1647 assert(0); 1648 return; 1649 1650 case OPvar: 1651 L1: 1652 if (!I16 && sz <= REGSIZE) 1653 { 1654 if (oper == OPmul && sz > 1) // no byte version 1655 { 1656 // Generate IMUL r32,r/m32 1657 retregs = *pretregs & (ALLREGS | mBP); 1658 if (!retregs) 1659 retregs = ALLREGS; 1660 codelem(cdb,e1,&retregs,false); // eval left leaf 1661 resreg = retregs; 1662 loadea(cdb,e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs); 1663 freenode(e2); 1664 fixresult(cdb,e,resreg,pretregs); 1665 return; 1666 } 1667 } 1668 else 1669 { 1670 if (sz == 2 * REGSIZE) 1671 { 1672 if (oper != OPmul || e.EV.E1.Eoper != opunslng || 1673 e1.Ecount) 1674 goto L2; // have to handle it with codelem() 1675 1676 retregs = ALLREGS & ~(mAX | mDX); 1677 codelem(cdb,e1.EV.E1,&retregs,false); // eval left leaf 1678 const reg = findreg(retregs); 1679 getregs(cdb,mAX); 1680 genmovreg(cdb,AX,reg); // MOV AX,reg 1681 loadea(cdb,e2,&cs,0xF7,4,REGSIZE,mAX | mDX | mskl(reg),mAX | mDX); // MUL EA+2 1682 getregs(cdb,retregs); 1683 cdb.gen1(0x90 + reg); // XCHG AX,reg 1684 getregs(cdb,mAX | mDX); 1685 if ((cs.Irm & 0xC0) == 0xC0) // if EA is a register 1686 loadea(cdb,e2,&cs,0xF7,4,0,mAX | mskl(reg),mAX | mDX); // MUL EA 1687 else 1688 { getlvalue_lsw(&cs); 1689 cdb.gen(&cs); // MUL EA 1690 } 1691 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD DX,reg 1692 1693 freenode(e1); 1694 fixresult(cdb,e,mAX | mDX,pretregs); 1695 return; 1696 } 1697 assert(sz <= REGSIZE); 1698 } 1699 1700 // loadea() handles CWD or CLR DX for divides 1701 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1702 loadea(cdb,e2,&cs,0xF7 ^ isbyte,op,0, 1703 (oper == OPmul) ? mAX : mAX | mDX, 1704 mAX | mDX); 1705 freenode(e2); 1706 fixresult(cdb,e,resreg,pretregs); 1707 return; 1708 } 1709 assert(0); 1710 } 1711 1712 1713 /*************************** 1714 * Handle OPnot and OPbool. 1715 * Generate: 1716 * c: [evaluate e1] 1717 * cfalse: [save reg code] 1718 * clr reg 1719 * jmp cnop 1720 * ctrue: [save reg code] 1721 * clr reg 1722 * inc reg 1723 * cnop: nop 1724 */ 1725 1726 void cdnot(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1727 { 1728 //printf("cdnot()\n"); 1729 reg_t reg; 1730 tym_t forflags; 1731 regm_t retregs; 1732 elem *e1 = e.EV.E1; 1733 1734 if (*pretregs == 0) 1735 goto L1; 1736 if (*pretregs == mPSW) 1737 { //assert(e.Eoper != OPnot && e.Eoper != OPbool);*/ /* should've been optimized 1738 L1: 1739 codelem(cdb,e1,pretregs,false); // evaluate e1 for cc 1740 return; 1741 } 1742 1743 OPER op = e.Eoper; 1744 uint sz = tysize(e1.Ety); 1745 uint rex = (I64 && sz == 8) ? REX_W : 0; 1746 uint grex = rex << 16; 1747 1748 if (!tyfloating(e1.Ety)) 1749 { 1750 if (sz <= REGSIZE && e1.Eoper == OPvar) 1751 { code cs; 1752 1753 getlvalue(cdb,&cs,e1,0); 1754 freenode(e1); 1755 if (!I16 && sz == 2) 1756 cs.Iflags |= CFopsize; 1757 1758 retregs = *pretregs & (ALLREGS | mBP); 1759 if (config.target_cpu >= TARGET_80486 && 1760 tysize(e.Ety) == 1) 1761 { 1762 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,®)) 1763 { 1764 cs.Iop = 0x39; 1765 if (I64 && (sz == 1) && reg >= 4) 1766 cs.Irex |= REX; 1767 } 1768 else 1769 { cs.Iop = 0x81; 1770 reg = 7; 1771 cs.IFL2 = FLconst; 1772 cs.IEV2.Vint = 0; 1773 } 1774 cs.Iop ^= (sz == 1); 1775 code_newreg(&cs,reg); 1776 cdb.gen(&cs); // CMP e1,0 1777 1778 retregs &= BYTEREGS; 1779 if (!retregs) 1780 retregs = BYTEREGS; 1781 allocreg(cdb,&retregs,®,TYint); 1782 1783 const opcode_t iop = (op == OPbool) 1784 ? 0x0F95 // SETNZ rm8 1785 : 0x0F94; // SETZ rm8 1786 cdb.gen2(iop, modregrmx(3,0,reg)); 1787 if (reg >= 4) 1788 code_orrex(cdb.last(), REX); 1789 if (op == OPbool) 1790 *pretregs &= ~mPSW; 1791 goto L4; 1792 } 1793 1794 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,®)) 1795 cs.Iop = 0x39; 1796 else 1797 { cs.Iop = 0x81; 1798 reg = 7; 1799 cs.IFL2 = FLconst; 1800 cs.IEV2.Vint = 1; 1801 } 1802 if (I64 && (sz == 1) && reg >= 4) 1803 cs.Irex |= REX; 1804 cs.Iop ^= (sz == 1); 1805 code_newreg(&cs,reg); 1806 cdb.gen(&cs); // CMP e1,1 1807 1808 allocreg(cdb,&retregs,®,TYint); 1809 op ^= (OPbool ^ OPnot); // switch operators 1810 goto L2; 1811 } 1812 else if (config.target_cpu >= TARGET_80486 && 1813 tysize(e.Ety) == 1) 1814 { 1815 int jop = jmpopcode(e.EV.E1); 1816 retregs = mPSW; 1817 codelem(cdb,e.EV.E1,&retregs,false); 1818 retregs = *pretregs & BYTEREGS; 1819 if (!retregs) 1820 retregs = BYTEREGS; 1821 allocreg(cdb,&retregs,®,TYint); 1822 1823 int iop = 0x0F90 | (jop & 0x0F); // SETcc rm8 1824 if (op == OPnot) 1825 iop ^= 1; 1826 cdb.gen2(iop,grex | modregrmx(3,0,reg)); 1827 if (reg >= 4) 1828 code_orrex(cdb.last(), REX); 1829 if (op == OPbool) 1830 *pretregs &= ~mPSW; 1831 goto L4; 1832 } 1833 else if (sz <= REGSIZE && 1834 // NEG bytereg is too expensive 1835 (sz != 1 || config.target_cpu < TARGET_PentiumPro)) 1836 { 1837 retregs = *pretregs & (ALLREGS | mBP); 1838 if (sz == 1 && !(retregs &= BYTEREGS)) 1839 retregs = BYTEREGS; 1840 codelem(cdb,e.EV.E1,&retregs,false); 1841 reg = findreg(retregs); 1842 getregs(cdb,retregs); 1843 cdb.gen2(sz == 1 ? 0xF6 : 0xF7,grex | modregrmx(3,3,reg)); // NEG reg 1844 code_orflag(cdb.last(),CFpsw); 1845 if (!I16 && sz == SHORTSIZE) 1846 code_orflag(cdb.last(),CFopsize); 1847 L2: 1848 genregs(cdb,0x19,reg,reg); // SBB reg,reg 1849 code_orrex(cdb.last(), rex); 1850 // At this point, reg==0 if e1==0, reg==-1 if e1!=0 1851 if (op == OPnot) 1852 { 1853 if (I64) 1854 cdb.gen2(0xFF,grex | modregrmx(3,0,reg)); // INC reg 1855 else 1856 cdb.gen1(0x40 + reg); // INC reg 1857 } 1858 else 1859 cdb.gen2(0xF7,grex | modregrmx(3,3,reg)); // NEG reg 1860 if (*pretregs & mPSW) 1861 { code_orflag(cdb.last(),CFpsw); 1862 *pretregs &= ~mPSW; // flags are always set anyway 1863 } 1864 L4: 1865 fixresult(cdb,e,retregs,pretregs); 1866 return; 1867 } 1868 } 1869 code *cnop = gennop(null); 1870 code *ctrue = gennop(null); 1871 logexp(cdb,e.EV.E1,(op == OPnot) ? false : true,FLcode,ctrue); 1872 forflags = *pretregs & mPSW; 1873 if (I64 && sz == 8) 1874 forflags |= 64; 1875 assert(tysize(e.Ety) <= REGSIZE); // result better be int 1876 CodeBuilder cdbfalse; 1877 cdbfalse.ctor(); 1878 allocreg(cdbfalse,pretregs,®,e.Ety); // allocate reg for result 1879 code *cfalse = cdbfalse.finish(); 1880 CodeBuilder cdbtrue; 1881 cdbtrue.ctor(); 1882 cdbtrue.append(ctrue); 1883 for (code *c1 = cfalse; c1; c1 = code_next(c1)) 1884 cdbtrue.gen(c1); // duplicate reg save code 1885 CodeBuilder cdbfalse2; 1886 cdbfalse2.ctor(); 1887 movregconst(cdbfalse2,reg,0,forflags); // mov 0 into reg 1888 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 1889 movregconst(cdbtrue,reg,1,forflags); // mov 1 into reg 1890 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 1891 genjmp(cdbfalse2,JMP,FLcode,cast(block *) cnop); // skip over ctrue 1892 cdb.append(cfalse); 1893 cdb.append(cdbfalse2); 1894 cdb.append(cdbtrue); 1895 cdb.append(cnop); 1896 } 1897 1898 1899 /************************ 1900 * Complement operator 1901 */ 1902 1903 void cdcom(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1904 { 1905 if (*pretregs == 0) 1906 { 1907 codelem(cdb,e.EV.E1,pretregs,false); 1908 return; 1909 } 1910 tym_t tym = tybasic(e.Ety); 1911 int sz = _tysize[tym]; 1912 uint rex = (I64 && sz == 8) ? REX_W : 0; 1913 regm_t possregs = (sz == 1) ? BYTEREGS : allregs; 1914 regm_t retregs = *pretregs & possregs; 1915 if (retregs == 0) 1916 retregs = possregs; 1917 codelem(cdb,e.EV.E1,&retregs,false); 1918 getregs(cdb,retregs); // retregs will be destroyed 1919 1920 if (0 && sz == 4 * REGSIZE) 1921 { 1922 cdb.gen2(0xF7,modregrm(3,2,AX)); // NOT AX 1923 cdb.gen2(0xF7,modregrm(3,2,BX)); // NOT BX 1924 cdb.gen2(0xF7,modregrm(3,2,CX)); // NOT CX 1925 cdb.gen2(0xF7,modregrm(3,2,DX)); // NOT DX 1926 } 1927 else 1928 { 1929 const reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs); 1930 const op = (sz == 1) ? 0xF6 : 0xF7; 1931 genregs(cdb,op,2,reg); // NOT reg 1932 code_orrex(cdb.last(), rex); 1933 if (I64 && sz == 1 && reg >= 4) 1934 code_orrex(cdb.last(), REX); 1935 if (sz == 2 * REGSIZE) 1936 { 1937 const reg2 = findreglsw(retregs); 1938 genregs(cdb,op,2,reg2); // NOT reg+1 1939 } 1940 } 1941 fixresult(cdb,e,retregs,pretregs); 1942 } 1943 1944 /************************ 1945 * Bswap operator 1946 */ 1947 1948 void cdbswap(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1949 { 1950 if (*pretregs == 0) 1951 { 1952 codelem(cdb,e.EV.E1,pretregs,false); 1953 return; 1954 } 1955 1956 tym_t tym = tybasic(e.Ety); 1957 assert(_tysize[tym] == 4); 1958 regm_t retregs = *pretregs & allregs; 1959 if (retregs == 0) 1960 retregs = allregs; 1961 codelem(cdb,e.EV.E1,&retregs,false); 1962 getregs(cdb,retregs); // retregs will be destroyed 1963 const reg = findreg(retregs); 1964 cdb.gen2(0x0FC8 + (reg & 7),0); // BSWAP reg 1965 if (reg & 8) 1966 code_orrex(cdb.last(), REX_B); 1967 fixresult(cdb,e,retregs,pretregs); 1968 } 1969 1970 /************************* 1971 * ?: operator 1972 */ 1973 1974 void cdcond(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1975 { 1976 con_t regconold,regconsave; 1977 uint stackpushold,stackpushsave; 1978 int ehindexold,ehindexsave; 1979 uint sz2; 1980 1981 /* vars to save state of 8087 */ 1982 int stackusedold,stackusedsave; 1983 NDP[global87.stack.length] _8087old; 1984 NDP[global87.stack.length] _8087save; 1985 1986 //printf("cdcond(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1987 elem *e1 = e.EV.E1; 1988 elem *e2 = e.EV.E2; 1989 elem *e21 = e2.EV.E1; 1990 elem *e22 = e2.EV.E2; 1991 regm_t psw = *pretregs & mPSW; /* save PSW bit */ 1992 const op1 = e1.Eoper; 1993 uint sz1 = tysize(e1.Ety); 1994 uint rex = (I64 && sz1 == 8) ? REX_W : 0; 1995 uint grex = rex << 16; 1996 uint jop = jmpopcode(e1); 1997 1998 uint jop1 = jmpopcode(e21); 1999 uint jop2 = jmpopcode(e22); 2000 2001 docommas(cdb,&e1); 2002 cgstate.stackclean++; 2003 2004 if (!OTrel(op1) && e1 == e21 && 2005 sz1 <= REGSIZE && !tyfloating(e1.Ety)) 2006 { // Recognize (e ? e : f) 2007 2008 code *cnop1 = gennop(null); 2009 regm_t retregs = *pretregs | mPSW; 2010 codelem(cdb,e1,&retregs,false); 2011 2012 cse_flush(cdb,1); // flush CSEs to memory 2013 genjmp(cdb,jop,FLcode,cast(block *)cnop1); 2014 freenode(e21); 2015 2016 regconsave = regcon; 2017 stackpushsave = stackpush; 2018 2019 retregs |= psw; 2020 if (retregs & (mBP | ALLREGS)) 2021 regimmed_set(findreg(retregs),0); 2022 codelem(cdb,e22,&retregs,false); 2023 2024 andregcon(®consave); 2025 assert(stackpushsave == stackpush); 2026 2027 *pretregs = retregs; 2028 freenode(e2); 2029 cdb.append(cnop1); 2030 cgstate.stackclean--; 2031 return; 2032 } 2033 2034 if (OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE && 2035 !e1.Ecount && 2036 (jop == JC || jop == JNC) && 2037 (sz2 = tysize(e2.Ety)) <= REGSIZE && 2038 e21.Eoper == OPconst && 2039 e22.Eoper == OPconst 2040 ) 2041 { 2042 regm_t retregs; 2043 targ_size_t v1,v2; 2044 2045 if (sz2 != 1 || I64) 2046 { 2047 retregs = *pretregs & (ALLREGS | mBP); 2048 if (!retregs) 2049 retregs = ALLREGS; 2050 } 2051 else 2052 { 2053 retregs = *pretregs & BYTEREGS; 2054 if (!retregs) 2055 retregs = BYTEREGS; 2056 } 2057 2058 cdcmp_flag = 1; 2059 v1 = cast(targ_size_t)e21.EV.Vllong; 2060 v2 = cast(targ_size_t)e22.EV.Vllong; 2061 if (jop == JNC) 2062 { v1 = v2; 2063 v2 = cast(targ_size_t)e21.EV.Vllong; 2064 } 2065 2066 opcode_t opcode = 0x81; 2067 switch (sz2) 2068 { case 1: opcode--; 2069 v1 = cast(byte) v1; 2070 v2 = cast(byte) v2; 2071 break; 2072 2073 case 2: v1 = cast(short) v1; 2074 v2 = cast(short) v2; 2075 break; 2076 2077 case 4: v1 = cast(int) v1; 2078 v2 = cast(int) v2; 2079 break; 2080 default: 2081 break; 2082 } 2083 2084 if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1) 2085 { 2086 // only zero-extension from 32-bits is available for 'or' 2087 } 2088 else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2) 2089 { 2090 // only sign-extension from 32-bits is available for 'and' 2091 } 2092 else 2093 { 2094 codelem(cdb,e1,&retregs,false); 2095 const reg = findreg(retregs); 2096 2097 if (v1 == 0 && v2 == ~cast(targ_size_t)0) 2098 { 2099 cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg)); // NOT reg 2100 if (I64 && sz2 == REGSIZE) 2101 code_orrex(cdb.last(), REX_W); 2102 } 2103 else 2104 { 2105 v1 -= v2; 2106 cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1); // AND reg,v1-v2 2107 if (I64 && sz2 == 1 && reg >= 4) 2108 code_orrex(cdb.last(), REX); 2109 if (v2 == 1 && !I64) 2110 cdb.gen1(0x40 + reg); // INC reg 2111 else if (v2 == -1L && !I64) 2112 cdb.gen1(0x48 + reg); // DEC reg 2113 else 2114 { cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2); // ADD reg,v2 2115 if (I64 && sz2 == 1 && reg >= 4) 2116 code_orrex(cdb.last(), REX); 2117 } 2118 } 2119 2120 freenode(e21); 2121 freenode(e22); 2122 freenode(e2); 2123 2124 fixresult(cdb,e,retregs,pretregs); 2125 cgstate.stackclean--; 2126 return; 2127 } 2128 } 2129 2130 if (op1 != OPcond && op1 != OPandand && op1 != OPoror && 2131 op1 != OPnot && op1 != OPbool && 2132 e21.Eoper == OPconst && 2133 sz1 <= REGSIZE && 2134 *pretregs & (mBP | ALLREGS) && 2135 tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety)) 2136 { // Recognize (e ? c : f) 2137 2138 code *cnop1 = gennop(null); 2139 regm_t retregs = mPSW; 2140 jop = jmpopcode(e1); // get jmp condition 2141 codelem(cdb,e1,&retregs,false); 2142 2143 // Set the register with e21 without affecting the flags 2144 retregs = *pretregs & (ALLREGS | mBP); 2145 if (retregs & ~regcon.mvar) 2146 retregs &= ~regcon.mvar; // don't disturb register variables 2147 // NOTE: see my email (sign extension bug? possible fix, some questions 2148 reg_t reg; 2149 regwithvalue(cdb,retregs,cast(targ_size_t)e21.EV.Vllong,®,tysize(e21.Ety) == 8 ? 64|8 : 8); 2150 retregs = mask(reg); 2151 2152 cse_flush(cdb,1); // flush CSE's to memory 2153 genjmp(cdb,jop,FLcode,cast(block *)cnop1); 2154 freenode(e21); 2155 2156 regconsave = regcon; 2157 stackpushsave = stackpush; 2158 2159 codelem(cdb,e22,&retregs,false); 2160 2161 andregcon(®consave); 2162 assert(stackpushsave == stackpush); 2163 2164 freenode(e2); 2165 cdb.append(cnop1); 2166 fixresult(cdb,e,retregs,pretregs); 2167 cgstate.stackclean--; 2168 return; 2169 } 2170 2171 code *cnop1 = gennop(null); 2172 code *cnop2 = gennop(null); // dummy target addresses 2173 logexp(cdb,e1,false,FLcode,cnop1); // evaluate condition 2174 regconold = regcon; 2175 stackusedold = global87.stackused; 2176 stackpushold = stackpush; 2177 memcpy(_8087old.ptr,global87.stack.ptr,global87.stack.sizeof); 2178 regm_t retregs = *pretregs; 2179 CodeBuilder cdb1; 2180 cdb1.ctor(); 2181 if (psw && jop1 != JNE) 2182 { 2183 retregs &= ~mPSW; 2184 if (!retregs) 2185 retregs = ALLREGS; 2186 codelem(cdb1,e21,&retregs,false); 2187 fixresult(cdb1,e21,retregs,pretregs); 2188 } 2189 else 2190 codelem(cdb1,e21,&retregs,false); 2191 2192 if (CPP && e2.Eoper == OPcolon2) 2193 { 2194 code cs; 2195 2196 // This is necessary so that any cleanup code on one branch 2197 // is redone on the other branch. 2198 cs.Iop = ESCAPE | ESCmark2; 2199 cs.Iflags = 0; 2200 cs.Irex = 0; 2201 cdb.gen(&cs); 2202 cdb.append(cdb1); 2203 cs.Iop = ESCAPE | ESCrelease2; 2204 cdb.gen(&cs); 2205 } 2206 else 2207 cdb.append(cdb1); 2208 2209 regconsave = regcon; 2210 regcon = regconold; 2211 2212 stackpushsave = stackpush; 2213 stackpush = stackpushold; 2214 2215 stackusedsave = global87.stackused; 2216 global87.stackused = stackusedold; 2217 2218 memcpy(_8087save.ptr,global87.stack.ptr,global87.stack.sizeof); 2219 memcpy(global87.stack.ptr,_8087old.ptr,global87.stack.sizeof); 2220 2221 retregs |= psw; // PSW bit may have been trashed 2222 CodeBuilder cdb2; 2223 cdb2.ctor(); 2224 if (psw && jop2 != JNE) 2225 { 2226 retregs &= ~mPSW; 2227 if (!retregs) 2228 retregs = ALLREGS; 2229 codelem(cdb2,e22,&retregs,false); 2230 fixresult(cdb2,e22,retregs,pretregs); 2231 } 2232 else 2233 codelem(cdb2,e22,&retregs,false); // use same regs as E1 2234 *pretregs = retregs | psw; 2235 andregcon(®conold); 2236 andregcon(®consave); 2237 assert(global87.stackused == stackusedsave); 2238 assert(stackpush == stackpushsave); 2239 memcpy(global87.stack.ptr,_8087save.ptr,global87.stack.sizeof); 2240 freenode(e2); 2241 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); 2242 cdb.append(cnop1); 2243 cdb.append(cdb2); 2244 cdb.append(cnop2); 2245 if (*pretregs & mST0) 2246 note87(e,0,0); 2247 2248 cgstate.stackclean--; 2249 } 2250 2251 /********************* 2252 * Comma operator OPcomma 2253 */ 2254 2255 void cdcomma(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2256 { 2257 regm_t retregs = 0; 2258 codelem(cdb,e.EV.E1,&retregs,false); // ignore value from left leaf 2259 codelem(cdb,e.EV.E2,pretregs,false); // do right leaf 2260 } 2261 2262 2263 /********************************* 2264 * Do && and || operators. 2265 * Generate: 2266 * (evaluate e1 and e2, if true goto cnop1) 2267 * cnop3: NOP 2268 * cg: [save reg code] ;if we must preserve reg 2269 * CLR reg ;false result (set Z also) 2270 * JMP cnop2 2271 * 2272 * cnop1: NOP ;if e1 evaluates to true 2273 * [save reg code] ;preserve reg 2274 * 2275 * MOV reg,1 ;true result 2276 * or 2277 * CLR reg ;if return result in flags 2278 * INC reg 2279 * 2280 * cnop2: NOP ;mark end of code 2281 */ 2282 2283 void cdloglog(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2284 { 2285 /* We can trip the assert with the following: 2286 * if ( (b<=a) ? (c<b || a<=c) : c>=a ) 2287 * We'll generate ugly code for it, but it's too obscure a case 2288 * to expend much effort on it. 2289 * assert(*pretregs != mPSW); 2290 */ 2291 2292 cgstate.stackclean++; 2293 code *cnop1 = gennop(null); 2294 CodeBuilder cdb1; 2295 cdb1.ctor(); 2296 cdb1.append(cnop1); 2297 code *cnop3 = gennop(null); 2298 elem *e2 = e.EV.E2; 2299 (e.Eoper == OPoror) 2300 ? logexp(cdb,e.EV.E1,1,FLcode,cnop1) 2301 : logexp(cdb,e.EV.E1,0,FLcode,cnop3); 2302 con_t regconsave = regcon; 2303 uint stackpushsave = stackpush; 2304 if (*pretregs == 0) // if don't want result 2305 { 2306 int noreturn = !el_returns(e2); 2307 codelem(cdb,e2,pretregs,false); 2308 if (noreturn) 2309 { 2310 regconsave.used |= regcon.used; 2311 regcon = regconsave; 2312 } 2313 else 2314 andregcon(®consave); 2315 assert(stackpush == stackpushsave); 2316 cdb.append(cnop3); 2317 cdb.append(cdb1); // eval code, throw away result 2318 cgstate.stackclean--; 2319 return; 2320 } 2321 code *cnop2 = gennop(null); 2322 uint sz = tysize(e.Ety); 2323 if (tybasic(e2.Ety) == TYbool && 2324 sz == tysize(e2.Ety) && 2325 !(*pretregs & mPSW) && 2326 e2.Eoper == OPcall) 2327 { 2328 codelem(cdb,e2,pretregs,false); 2329 2330 andregcon(®consave); 2331 2332 // stack depth should not change when evaluating E2 2333 assert(stackpush == stackpushsave); 2334 2335 assert(sz <= 4); // result better be int 2336 regm_t retregs = *pretregs & allregs; 2337 reg_t reg; 2338 allocreg(cdb1,&retregs,®,TYint); // allocate reg for result 2339 movregconst(cdb1,reg,e.Eoper == OPoror,0); // reg = 1 2340 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2341 *pretregs = retregs; 2342 if (e.Eoper == OPoror) 2343 { 2344 cdb.append(cnop3); 2345 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2346 cdb.append(cdb1); 2347 cdb.append(cnop2); 2348 } 2349 else 2350 { 2351 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2352 cdb.append(cnop3); 2353 cdb.append(cdb1); 2354 cdb.append(cnop2); 2355 } 2356 cgstate.stackclean--; 2357 return; 2358 } 2359 logexp(cdb,e2,1,FLcode,cnop1); 2360 andregcon(®consave); 2361 2362 // stack depth should not change when evaluating E2 2363 assert(stackpush == stackpushsave); 2364 2365 assert(sz <= 4); // result better be int 2366 regm_t retregs = *pretregs & (ALLREGS | mBP); 2367 if (!retregs) 2368 retregs = ALLREGS; // if mPSW only 2369 CodeBuilder cdbcg; 2370 cdbcg.ctor(); 2371 reg_t reg; 2372 allocreg(cdbcg,&retregs,®,TYint); // allocate reg for result 2373 code *cg = cdbcg.finish(); 2374 for (code *c1 = cg; c1; c1 = code_next(c1)) // for each instruction 2375 cdb1.gen(c1); // duplicate it 2376 CodeBuilder cdbcg2; 2377 cdbcg2.ctor(); 2378 movregconst(cdbcg2,reg,0,*pretregs & mPSW); // MOV reg,0 2379 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2380 genjmp(cdbcg2, JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2381 movregconst(cdb1,reg,1,*pretregs & mPSW); // reg = 1 2382 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2383 *pretregs = retregs; 2384 cdb.append(cnop3); 2385 cdb.append(cg); 2386 cdb.append(cdbcg2); 2387 cdb.append(cdb1); 2388 cdb.append(cnop2); 2389 cgstate.stackclean--; 2390 return; 2391 } 2392 2393 2394 /********************* 2395 * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror). 2396 */ 2397 2398 void cdshift(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2399 { 2400 reg_t resreg; 2401 uint shiftcnt; 2402 regm_t retregs,rretregs; 2403 2404 //printf("cdshift()\n"); 2405 elem *e1 = e.EV.E1; 2406 if (*pretregs == 0) // if don't want result 2407 { 2408 codelem(cdb,e1,pretregs,false); // eval left leaf 2409 *pretregs = 0; // in case they got set 2410 codelem(cdb,e.EV.E2,pretregs,false); 2411 return; 2412 } 2413 2414 tym_t tyml = tybasic(e1.Ety); 2415 int sz = _tysize[tyml]; 2416 assert(!tyfloating(tyml)); 2417 OPER oper = e.Eoper; 2418 uint grex = ((I64 && sz == 8) ? REX_W : 0) << 16; 2419 2420 version (SCPP) 2421 { 2422 // Do this until the rest of the compiler does OPshr/OPashr correctly 2423 if (oper == OPshr) 2424 oper = (tyuns(tyml)) ? OPshr : OPashr; 2425 } 2426 2427 uint s1,s2; 2428 switch (oper) 2429 { 2430 case OPshl: 2431 s1 = 4; // SHL 2432 s2 = 2; // RCL 2433 break; 2434 case OPshr: 2435 s1 = 5; // SHR 2436 s2 = 3; // RCR 2437 break; 2438 case OPashr: 2439 s1 = 7; // SAR 2440 s2 = 3; // RCR 2441 break; 2442 case OProl: 2443 s1 = 0; // ROL 2444 break; 2445 case OPror: 2446 s1 = 1; // ROR 2447 break; 2448 default: 2449 assert(0); 2450 } 2451 2452 reg_t sreg = NOREG; // guard against using value without assigning to sreg 2453 elem *e2 = e.EV.E2; 2454 regm_t forccs = *pretregs & mPSW; // if return result in CCs 2455 regm_t forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs 2456 bool e2isconst = false; // assume for the moment 2457 uint isbyte = (sz == 1); 2458 switch (e2.Eoper) 2459 { 2460 case OPconst: 2461 e2isconst = true; // e2 is a constant 2462 shiftcnt = e2.EV.Vint; // get shift count 2463 if ((!I16 && sz <= REGSIZE) || 2464 shiftcnt <= 4 || // if sequence of shifts 2465 (sz == 2 && 2466 (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) || 2467 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 2468 ) 2469 { 2470 retregs = (forregs) ? forregs 2471 : ALLREGS; 2472 if (isbyte) 2473 { retregs &= BYTEREGS; 2474 if (!retregs) 2475 retregs = BYTEREGS; 2476 } 2477 else if (sz > REGSIZE && sz <= 2 * REGSIZE && 2478 !(retregs & mMSW)) 2479 retregs |= mMSW & ALLREGS; 2480 if (s1 == 7) // if arithmetic right shift 2481 { 2482 if (shiftcnt == 8) 2483 retregs = mAX; 2484 else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 2485 retregs = mDX|mAX; 2486 } 2487 2488 if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE && 2489 oper == OPshl && 2490 !e1.Ecount && 2491 (e1.Eoper == OPs16_32 || e1.Eoper == OPu16_32 || 2492 e1.Eoper == OPs32_64 || e1.Eoper == OPu32_64) 2493 ) 2494 { // Handle (shtlng)s << 16 2495 regm_t r = retregs & mMSW; 2496 codelem(cdb,e1.EV.E1,&r,false); // eval left leaf 2497 regwithvalue(cdb,retregs & mLSW,0,&resreg,0); 2498 getregs(cdb,r); 2499 retregs = r | mask(resreg); 2500 if (forccs) 2501 { sreg = findreg(r); 2502 gentstreg(cdb,sreg); 2503 *pretregs &= ~mPSW; // already set 2504 } 2505 freenode(e1); 2506 freenode(e2); 2507 break; 2508 } 2509 2510 // See if we should use LEA reg,xxx instead of shift 2511 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 && 2512 (sz == REGSIZE || (I64 && sz == 4)) && 2513 oper == OPshl && 2514 e1.Eoper == OPvar && 2515 !(*pretregs & mPSW) && 2516 config.flags4 & CFG4speed 2517 ) 2518 { 2519 reg_t reg; 2520 regm_t regm; 2521 2522 if (isregvar(e1,®m,®) && !(regm & retregs)) 2523 { code cs; 2524 allocreg(cdb,&retregs,&resreg,e.Ety); 2525 buildEA(&cs,-1,reg,1 << shiftcnt,0); 2526 cs.Iop = LEA; 2527 code_newreg(&cs,resreg); 2528 cs.Iflags = 0; 2529 if (I64 && sz == 8) 2530 cs.Irex |= REX_W; 2531 cdb.gen(&cs); // LEA resreg,[reg * ss] 2532 freenode(e1); 2533 freenode(e2); 2534 break; 2535 } 2536 } 2537 2538 codelem(cdb,e1,&retregs,false); // eval left leaf 2539 //assert((retregs & regcon.mvar) == 0); 2540 getregs(cdb,retregs); // modify these regs 2541 2542 { 2543 if (sz == 2 * REGSIZE) 2544 { resreg = findregmsw(retregs); 2545 sreg = findreglsw(retregs); 2546 } 2547 else 2548 { resreg = findreg(retregs); 2549 sreg = NOREG; // an invalid value 2550 } 2551 if (config.target_cpu >= TARGET_80286 && 2552 sz <= REGSIZE) 2553 { 2554 // SHL resreg,shiftcnt 2555 assert(!(sz == 1 && (mask(resreg) & ~BYTEREGS))); 2556 cdb.genc2(0xC1 ^ isbyte,grex | modregxrmx(3,s1,resreg),shiftcnt); 2557 if (shiftcnt == 1) 2558 cdb.last().Iop += 0x10; // short form of shift 2559 if (I64 && sz == 1 && resreg >= 4) 2560 cdb.last().Irex |= REX; 2561 // See if we need operand size prefix 2562 if (!I16 && oper != OPshl && sz == 2) 2563 cdb.last().Iflags |= CFopsize; 2564 if (forccs) 2565 cdb.last().Iflags |= CFpsw; // need flags result 2566 } 2567 else if (shiftcnt == 8) 2568 { if (!(retregs & BYTEREGS) || resreg >= 4) 2569 { 2570 goto L1; 2571 } 2572 2573 if (pass != PASSfinal && (!forregs || forregs & (mSI | mDI))) 2574 { 2575 // e1 might get into SI or DI in a later pass, 2576 // so don't put CX into a register 2577 getregs(cdb,mCX); 2578 } 2579 2580 assert(sz == 2); 2581 switch (oper) 2582 { 2583 case OPshl: 2584 // MOV regH,regL XOR regL,regL 2585 assert(resreg < 4 && !grex); 2586 genregs(cdb,0x8A,resreg+4,resreg); 2587 genregs(cdb,0x32,resreg,resreg); 2588 break; 2589 2590 case OPshr: 2591 case OPashr: 2592 // MOV regL,regH 2593 genregs(cdb,0x8A,resreg,resreg+4); 2594 if (oper == OPashr) 2595 cdb.gen1(0x98); // CBW 2596 else 2597 genregs(cdb,0x32,resreg+4,resreg+4); // CLR regH 2598 break; 2599 2600 case OPror: 2601 case OProl: 2602 // XCHG regL,regH 2603 genregs(cdb,0x86,resreg+4,resreg); 2604 break; 2605 2606 default: 2607 assert(0); 2608 } 2609 if (forccs) 2610 gentstreg(cdb,resreg); 2611 } 2612 else if (shiftcnt == REGSIZE * 8) // it's an lword 2613 { 2614 if (oper == OPshl) 2615 swap(&resreg, &sreg); 2616 genmovreg(cdb,sreg,resreg); // MOV sreg,resreg 2617 if (oper == OPashr) 2618 cdb.gen1(0x99); // CWD 2619 else 2620 movregconst(cdb,resreg,0,0); // MOV resreg,0 2621 if (forccs) 2622 { 2623 gentstreg(cdb,sreg); 2624 *pretregs &= mBP | ALLREGS | mES; 2625 } 2626 } 2627 else 2628 { 2629 if (oper == OPshl && sz == 2 * REGSIZE) 2630 swap(&resreg, &sreg); 2631 while (shiftcnt--) 2632 { 2633 cdb.gen2(0xD1 ^ isbyte,modregrm(3,s1,resreg)); 2634 if (sz == 2 * REGSIZE) 2635 { 2636 code_orflag(cdb.last(),CFpsw); 2637 cdb.gen2(0xD1,modregrm(3,s2,sreg)); 2638 } 2639 } 2640 if (forccs) 2641 code_orflag(cdb.last(),CFpsw); 2642 } 2643 if (sz <= REGSIZE) 2644 *pretregs &= mBP | ALLREGS; // flags already set 2645 } 2646 freenode(e2); 2647 break; 2648 } 2649 goto default; 2650 2651 default: 2652 retregs = forregs & ~mCX; // CX will be shift count 2653 if (sz <= REGSIZE) 2654 { 2655 if (forregs & ~regcon.mvar && !(retregs & ~regcon.mvar)) 2656 retregs = ALLREGS & ~mCX; // need something 2657 else if (!retregs) 2658 retregs = ALLREGS & ~mCX; // need something 2659 if (sz == 1) 2660 { retregs &= mAX|mBX|mDX; 2661 if (!retregs) 2662 retregs = mAX|mBX|mDX; 2663 } 2664 } 2665 else 2666 { 2667 if (!(retregs & mMSW)) 2668 retregs = ALLREGS & ~mCX; 2669 } 2670 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 2671 2672 if (sz <= REGSIZE) 2673 resreg = findreg(retregs); 2674 else 2675 { 2676 resreg = findregmsw(retregs); 2677 sreg = findreglsw(retregs); 2678 } 2679 L1: 2680 rretregs = mCX; // CX is shift count 2681 if (sz <= REGSIZE) 2682 { 2683 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue 2684 getregs(cdb,retregs); // trash these regs 2685 cdb.gen2(0xD3 ^ isbyte,grex | modregrmx(3,s1,resreg)); // Sxx resreg,CX 2686 2687 if (!I16 && sz == 2 && (oper == OProl || oper == OPror)) 2688 cdb.last().Iflags |= CFopsize; 2689 2690 // Note that a shift by CL does not set the flags if 2691 // CL == 0. If e2 is a constant, we know it isn't 0 2692 // (it would have been optimized out). 2693 if (e2isconst) 2694 *pretregs &= mBP | ALLREGS; // flags already set with result 2695 } 2696 else if (sz == 2 * REGSIZE && 2697 config.target_cpu >= TARGET_80386) 2698 { 2699 reg_t hreg = resreg; 2700 reg_t lreg = sreg; 2701 uint rex = I64 ? (REX_W << 16) : 0; 2702 if (e2isconst) 2703 { 2704 getregs(cdb,retregs); 2705 if (shiftcnt & (REGSIZE * 8)) 2706 { 2707 if (oper == OPshr) 2708 { // SHR hreg,shiftcnt 2709 // MOV lreg,hreg 2710 // XOR hreg,hreg 2711 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt - (REGSIZE * 8)); 2712 genmovreg(cdb,lreg,hreg); 2713 movregconst(cdb,hreg,0,0); 2714 } 2715 else if (oper == OPashr) 2716 { // MOV lreg,hreg 2717 // SAR hreg,31 2718 // SHRD lreg,hreg,shiftcnt 2719 genmovreg(cdb,lreg,hreg); 2720 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),(REGSIZE * 8) - 1); 2721 cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt - (REGSIZE * 8)); 2722 } 2723 else 2724 { // SHL lreg,shiftcnt 2725 // MOV hreg,lreg 2726 // XOR lreg,lreg 2727 cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt - (REGSIZE * 8)); 2728 genmovreg(cdb,hreg,lreg); 2729 movregconst(cdb,lreg,0,0); 2730 } 2731 } 2732 else 2733 { 2734 if (oper == OPshr || oper == OPashr) 2735 { // SHRD lreg,hreg,shiftcnt 2736 // SHR/SAR hreg,shiftcnt 2737 cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt); 2738 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt); 2739 } 2740 else 2741 { // SHLD hreg,lreg,shiftcnt 2742 // SHL lreg,shiftcnt 2743 cdb.genc2(0x0FA4,rex | modregrm(3,lreg,hreg),shiftcnt); 2744 cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt); 2745 } 2746 } 2747 freenode(e2); 2748 } 2749 else if (config.target_cpu >= TARGET_80486 && REGSIZE == 2) 2750 { 2751 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX 2752 getregs(cdb,retregs); // modify these regs 2753 if (oper == OPshl) 2754 { 2755 /* 2756 SHLD hreg,lreg,CL 2757 SHL lreg,CL 2758 */ 2759 2760 cdb.gen2(0x0FA5,modregrm(3,lreg,hreg)); 2761 cdb.gen2(0xD3,modregrm(3,4,lreg)); 2762 } 2763 else 2764 { 2765 /* 2766 SHRD lreg,hreg,CL 2767 SAR hreg,CL 2768 2769 -- or -- 2770 2771 SHRD lreg,hreg,CL 2772 SHR hreg,CL 2773 */ 2774 cdb.gen2(0x0FAD,modregrm(3,hreg,lreg)); 2775 cdb.gen2(0xD3,modregrm(3,s1,hreg)); 2776 } 2777 } 2778 else 2779 { code* cl1,cl2; 2780 2781 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX 2782 getregs(cdb,retregs | mCX); // modify these regs 2783 // TEST CL,0x20 2784 cdb.genc2(0xF6,modregrm(3,0,CX),REGSIZE * 8); 2785 cl1 = gennop(null); 2786 CodeBuilder cdb1; 2787 cdb1.ctor(); 2788 cdb1.append(cl1); 2789 if (oper == OPshl) 2790 { 2791 /* TEST CL,20H 2792 JNE L1 2793 SHLD hreg,lreg,CL 2794 SHL lreg,CL 2795 JMP L2 2796 L1: AND CL,20H-1 2797 SHL lreg,CL 2798 MOV hreg,lreg 2799 XOR lreg,lreg 2800 L2: NOP 2801 */ 2802 2803 if (REGSIZE == 2) 2804 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 2805 cdb1.gen2(0xD3,modregrm(3,4,lreg)); 2806 genmovreg(cdb1,hreg,lreg); 2807 genregs(cdb1,0x31,lreg,lreg); 2808 2809 genjmp(cdb,JNE,FLcode,cast(block *)cl1); 2810 cdb.gen2(0x0FA5,modregrm(3,lreg,hreg)); 2811 cdb.gen2(0xD3,modregrm(3,4,lreg)); 2812 } 2813 else 2814 { if (oper == OPashr) 2815 { 2816 /* TEST CL,20H 2817 JNE L1 2818 SHRD lreg,hreg,CL 2819 SAR hreg,CL 2820 JMP L2 2821 L1: AND CL,15 2822 MOV lreg,hreg 2823 SAR hreg,31 2824 SHRD lreg,hreg,CL 2825 L2: NOP 2826 */ 2827 2828 if (REGSIZE == 2) 2829 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 2830 genmovreg(cdb1,lreg,hreg); 2831 cdb1.genc2(0xC1,modregrm(3,s1,hreg),31); 2832 cdb1.gen2(0x0FAD,modregrm(3,hreg,lreg)); 2833 } 2834 else 2835 { 2836 /* TEST CL,20H 2837 JNE L1 2838 SHRD lreg,hreg,CL 2839 SHR hreg,CL 2840 JMP L2 2841 L1: AND CL,15 2842 SHR hreg,CL 2843 MOV lreg,hreg 2844 XOR hreg,hreg 2845 L2: NOP 2846 */ 2847 2848 if (REGSIZE == 2) 2849 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 2850 cdb1.gen2(0xD3,modregrm(3,5,hreg)); 2851 genmovreg(cdb1,lreg,hreg); 2852 genregs(cdb1,0x31,hreg,hreg); 2853 } 2854 genjmp(cdb,JNE,FLcode,cast(block *)cl1); 2855 cdb.gen2(0x0FAD,modregrm(3,hreg,lreg)); 2856 cdb.gen2(0xD3,modregrm(3,s1,hreg)); 2857 } 2858 cl2 = gennop(null); 2859 genjmp(cdb,JMPS,FLcode,cast(block *)cl2); 2860 cdb.append(cdb1); 2861 cdb.append(cl2); 2862 } 2863 break; 2864 } 2865 else if (sz == 2 * REGSIZE) 2866 { 2867 scodelem(cdb,e2,&rretregs,retregs,false); 2868 getregs(cdb,retregs | mCX); 2869 if (oper == OPshl) 2870 swap(&resreg, &sreg); 2871 if (!e2isconst) // if not sure shift count != 0 2872 cdb.genc2(0xE3,0,6); // JCXZ .+6 2873 cdb.gen2(0xD1,modregrm(3,s1,resreg)); 2874 code_orflag(cdb.last(),CFtarg2); 2875 cdb.gen2(0xD1,modregrm(3,s2,sreg)); 2876 cdb.genc2(0xE2,0,cast(targ_uns)-6); // LOOP .-6 2877 regimmed_set(CX,0); // note that now CX == 0 2878 } 2879 else 2880 assert(0); 2881 break; 2882 } 2883 fixresult(cdb,e,retregs,pretregs); 2884 } 2885 2886 2887 /*************************** 2888 * Perform a 'star' reference (indirection). 2889 */ 2890 2891 void cdind(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2892 { 2893 regm_t retregs; 2894 reg_t reg; 2895 uint nreg; 2896 2897 //printf("cdind(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 2898 tym_t tym = tybasic(e.Ety); 2899 if (tyfloating(tym)) 2900 { 2901 if (config.inline8087) 2902 { 2903 if (*pretregs & mST0) 2904 { 2905 cdind87(cdb, e, pretregs); 2906 return; 2907 } 2908 if (I64 && tym == TYcfloat && *pretregs & (ALLREGS | mBP)) 2909 { } 2910 else if (tycomplex(tym)) 2911 { 2912 cload87(cdb, e, pretregs); 2913 return; 2914 } 2915 2916 if (*pretregs & mPSW) 2917 { 2918 cdind87(cdb, e, pretregs); 2919 return; 2920 } 2921 } 2922 } 2923 2924 elem *e1 = e.EV.E1; 2925 assert(e1); 2926 switch (tym) 2927 { 2928 case TYstruct: 2929 case TYarray: 2930 // This case should never happen, why is it here? 2931 tym = TYnptr; // don't confuse allocreg() 2932 if (*pretregs & (mES | mCX) || e.Ety & mTYfar) 2933 tym = TYfptr; 2934 break; 2935 2936 default: 2937 break; 2938 } 2939 uint sz = _tysize[tym]; 2940 uint isbyte = tybyte(tym) != 0; 2941 2942 code cs; 2943 2944 getlvalue(cdb,&cs,e,RMload); // get addressing mode 2945 //printf("Irex = %02x, Irm = x%02x, Isib = x%02x\n", cs.Irex, cs.Irm, cs.Isib); 2946 //fprintf(stderr,"cd2 :\n"); WRcodlst(c); 2947 if (*pretregs == 0) 2948 { 2949 if (e.Ety & mTYvolatile) // do the load anyway 2950 *pretregs = regmask(e.Ety, 0); // load into registers 2951 else 2952 return; 2953 } 2954 2955 regm_t idxregs = idxregm(&cs); // mask of index regs used 2956 2957 if (*pretregs == mPSW) 2958 { 2959 if (!I16 && tym == TYfloat) 2960 { 2961 retregs = ALLREGS & ~idxregs; 2962 allocreg(cdb,&retregs,®,TYfloat); 2963 cs.Iop = 0x8B; 2964 code_newreg(&cs,reg); 2965 cdb.gen(&cs); // MOV reg,lsw 2966 cdb.gen2(0xD1,modregrmx(3,4,reg)); // SHL reg,1 2967 code_orflag(cdb.last(), CFpsw); 2968 } 2969 else if (sz <= REGSIZE) 2970 { 2971 cs.Iop = 0x81 ^ isbyte; 2972 cs.Irm |= modregrm(0,7,0); 2973 cs.IFL2 = FLconst; 2974 cs.IEV2.Vsize_t = 0; 2975 cdb.gen(&cs); // CMP [idx],0 2976 } 2977 else if (!I16 && sz == REGSIZE + 2) // if far pointer 2978 { 2979 retregs = ALLREGS & ~idxregs; 2980 allocreg(cdb,&retregs,®,TYint); 2981 cs.Iop = 0x0FB7; 2982 cs.Irm |= modregrm(0,reg,0); 2983 getlvalue_msw(&cs); 2984 cdb.gen(&cs); // MOVZX reg,msw 2985 goto L4; 2986 } 2987 else if (sz <= 2 * REGSIZE) 2988 { 2989 retregs = ALLREGS & ~idxregs; 2990 allocreg(cdb,&retregs,®,TYint); 2991 cs.Iop = 0x8B; 2992 code_newreg(&cs,reg); 2993 getlvalue_msw(&cs); 2994 cdb.gen(&cs); // MOV reg,msw 2995 if (I32) 2996 { if (tym == TYdouble || tym == TYdouble_alias) 2997 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 2998 } 2999 else if (tym == TYfloat) 3000 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 3001 L4: 3002 cs.Iop = 0x0B; 3003 getlvalue_lsw(&cs); 3004 cs.Iflags |= CFpsw; 3005 cdb.gen(&cs); // OR reg,lsw 3006 } 3007 else if (!I32 && sz == 8) 3008 { 3009 *pretregs |= DOUBLEREGS_16; // fake it for now 3010 goto L1; 3011 } 3012 else 3013 { 3014 debug WRTYxx(tym); 3015 assert(0); 3016 } 3017 } 3018 else // else return result in reg 3019 { 3020 L1: 3021 retregs = *pretregs; 3022 if (sz == 8 && 3023 (retregs & (mPSW | mSTACK | ALLREGS | mBP)) == mSTACK) 3024 { int i; 3025 3026 // Optimizer should not CSE these, as the result is worse code! 3027 assert(!e.Ecount); 3028 3029 cs.Iop = 0xFF; 3030 cs.Irm |= modregrm(0,6,0); 3031 cs.IEV1.Voffset += 8 - REGSIZE; 3032 stackchanged = 1; 3033 i = 8 - REGSIZE; 3034 do 3035 { 3036 cdb.gen(&cs); // PUSH EA+i 3037 cdb.genadjesp(REGSIZE); 3038 cs.IEV1.Voffset -= REGSIZE; 3039 stackpush += REGSIZE; 3040 i -= REGSIZE; 3041 } 3042 while (i >= 0); 3043 goto L3; 3044 } 3045 if (I16 && sz == 8) 3046 retregs = DOUBLEREGS_16; 3047 3048 // Watch out for loading an lptr from an lptr! We must have 3049 // the offset loaded into a different register. 3050 /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes) 3051 retregs = ALLREGS;*/ 3052 3053 { 3054 assert(!isbyte || retregs & BYTEREGS); 3055 allocreg(cdb,&retregs,®,tym); // alloc registers 3056 } 3057 if (retregs & XMMREGS) 3058 { 3059 assert(sz == 4 || sz == 8 || sz == 16 || sz == 32); // float, double or vector 3060 cs.Iop = xmmload(tym); 3061 cs.Irex &= ~REX_W; 3062 code_newreg(&cs,reg - XMM0); 3063 checkSetVex(&cs,tym); 3064 cdb.gen(&cs); // MOV reg,[idx] 3065 } 3066 else if (sz <= REGSIZE) 3067 { 3068 cs.Iop = 0x8B; // MOV 3069 if (sz <= 2 && !I16 && 3070 config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed) 3071 { 3072 cs.Iop = tyuns(tym) ? 0x0FB7 : 0x0FBF; // MOVZX/MOVSX 3073 cs.Iflags &= ~CFopsize; 3074 } 3075 cs.Iop ^= isbyte; 3076 L2: 3077 code_newreg(&cs,reg); 3078 cdb.gen(&cs); // MOV reg,[idx] 3079 if (isbyte && reg >= 4) 3080 code_orrex(cdb.last(), REX); 3081 } 3082 else if ((tym == TYfptr || tym == TYhptr) && retregs & mES) 3083 { 3084 cs.Iop = 0xC4; // LES reg,[idx] 3085 goto L2; 3086 } 3087 else if (sz <= 2 * REGSIZE) 3088 { uint lsreg; 3089 3090 cs.Iop = 0x8B; 3091 // Be careful not to interfere with index registers 3092 if (!I16) 3093 { 3094 // Can't handle if both result registers are used in 3095 // the addressing mode. 3096 if ((retregs & idxregs) == retregs) 3097 { 3098 retregs = mMSW & allregs & ~idxregs; 3099 if (!retregs) 3100 retregs |= mCX; 3101 retregs |= mLSW & ~idxregs; 3102 3103 // We can run out of registers, so if that's possible, 3104 // give us *one* of the idxregs 3105 if ((retregs & ~regcon.mvar & mLSW) == 0) 3106 { 3107 regm_t x = idxregs & mLSW; 3108 if (x) 3109 retregs |= mask(findreg(x)); // give us one idxreg 3110 } 3111 else if ((retregs & ~regcon.mvar & mMSW) == 0) 3112 { 3113 regm_t x = idxregs & mMSW; 3114 if (x) 3115 retregs |= mask(findreg(x)); // give us one idxreg 3116 } 3117 3118 allocreg(cdb,&retregs,®,tym); // alloc registers 3119 assert((retregs & idxregs) != retregs); 3120 } 3121 3122 lsreg = findreglsw(retregs); 3123 if (mask(reg) & idxregs) // reg is in addr mode 3124 { 3125 code_newreg(&cs,lsreg); 3126 cdb.gen(&cs); // MOV lsreg,lsw 3127 if (sz == REGSIZE + 2) 3128 cs.Iflags |= CFopsize; 3129 lsreg = reg; 3130 getlvalue_msw(&cs); // MOV reg,msw 3131 } 3132 else 3133 { 3134 code_newreg(&cs,reg); 3135 getlvalue_msw(&cs); 3136 cdb.gen(&cs); // MOV reg,msw 3137 if (sz == REGSIZE + 2) 3138 cdb.last().Iflags |= CFopsize; 3139 getlvalue_lsw(&cs); // MOV lsreg,lsw 3140 } 3141 NEWREG(cs.Irm,lsreg); 3142 cdb.gen(&cs); 3143 } 3144 else 3145 { 3146 // Index registers are always the lsw! 3147 cs.Irm |= modregrm(0,reg,0); 3148 getlvalue_msw(&cs); 3149 cdb.gen(&cs); // MOV reg,msw 3150 lsreg = findreglsw(retregs); 3151 NEWREG(cs.Irm,lsreg); 3152 getlvalue_lsw(&cs); // MOV lsreg,lsw 3153 cdb.gen(&cs); 3154 } 3155 } 3156 else if (I16 && sz == 8) 3157 { 3158 assert(reg == AX); 3159 cs.Iop = 0x8B; 3160 cs.IEV1.Voffset += 6; 3161 cdb.gen(&cs); // MOV AX,EA+6 3162 cs.Irm |= modregrm(0,CX,0); 3163 cs.IEV1.Voffset -= 4; 3164 cdb.gen(&cs); // MOV CX,EA+2 3165 NEWREG(cs.Irm,DX); 3166 cs.IEV1.Voffset -= 2; 3167 cdb.gen(&cs); // MOV DX,EA 3168 cs.IEV1.Voffset += 4; 3169 NEWREG(cs.Irm,BX); 3170 cdb.gen(&cs); // MOV BX,EA+4 3171 } 3172 else 3173 assert(0); 3174 L3: 3175 fixresult(cdb,e,retregs,pretregs); 3176 } 3177 //fprintf(stderr,"cdafter :\n"); WRcodlst(c); 3178 } 3179 3180 3181 3182 static if (!TARGET_SEGMENTED) 3183 { 3184 private code *cod2_setES(tym_t ty) { return null; } 3185 } 3186 else 3187 { 3188 /******************************** 3189 * Generate code to load ES with the right segment value, 3190 * do nothing if e is a far pointer. 3191 */ 3192 3193 private code *cod2_setES(tym_t ty) 3194 { 3195 int push; 3196 3197 CodeBuilder cdb; 3198 cdb.ctor(); 3199 switch (tybasic(ty)) 3200 { 3201 case TYnptr: 3202 if (!(config.flags3 & CFG3eseqds)) 3203 { push = 0x1E; // PUSH DS 3204 goto L1; 3205 } 3206 break; 3207 case TYcptr: 3208 push = 0x0E; // PUSH CS 3209 goto L1; 3210 case TYsptr: 3211 if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds)) 3212 { push = 0x16; // PUSH SS 3213 L1: 3214 // Must load ES 3215 getregs(cdb,mES); 3216 cdb.gen1(push); 3217 cdb.gen1(0x07); // POP ES 3218 } 3219 break; 3220 3221 default: 3222 break; 3223 } 3224 return cdb.finish(); 3225 } 3226 } 3227 3228 /******************************** 3229 * Generate code for intrinsic strlen(). 3230 */ 3231 3232 void cdstrlen(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3233 { 3234 /* Generate strlen in CX: 3235 LES DI,e1 3236 CLR AX ;scan for 0 3237 MOV CX,-1 ;largest possible string 3238 REPNE SCASB 3239 NOT CX 3240 DEC CX 3241 */ 3242 3243 regm_t retregs = mDI; 3244 tym_t ty1 = e.EV.E1.Ety; 3245 if (!tyreg(ty1)) 3246 retregs |= mES; 3247 codelem(cdb,e.EV.E1,&retregs,false); 3248 3249 // Make sure ES contains proper segment value 3250 cdb.append(cod2_setES(ty1)); 3251 3252 ubyte rex = I64 ? REX_W : 0; 3253 3254 getregs_imm(cdb,mAX | mCX); 3255 movregconst(cdb,AX,0,1); // MOV AL,0 3256 movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0); // MOV CX,-1 3257 getregs(cdb,mDI|mCX); 3258 cdb.gen1(0xF2); // REPNE 3259 cdb.gen1(0xAE); // SCASB 3260 genregs(cdb,0xF7,2,CX); // NOT CX 3261 code_orrex(cdb.last(), rex); 3262 if (I64) 3263 cdb.gen2(0xFF,(rex << 16) | modregrm(3,1,CX)); // DEC reg 3264 else 3265 cdb.gen1(0x48 + CX); // DEC CX 3266 3267 if (*pretregs & mPSW) 3268 { 3269 cdb.last().Iflags |= CFpsw; 3270 *pretregs &= ~mPSW; 3271 } 3272 fixresult(cdb,e,mCX,pretregs); 3273 } 3274 3275 3276 /********************************* 3277 * Generate code for strcmp(s1,s2) intrinsic. 3278 */ 3279 3280 void cdstrcmp(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3281 { 3282 char need_DS; 3283 int segreg; 3284 3285 /* 3286 MOV SI,s1 ;get destination pointer (s1) 3287 MOV CX,s1+2 3288 LES DI,s2 ;get source pointer (s2) 3289 PUSH DS 3290 MOV DS,CX 3291 CLR AX ;scan for 0 3292 MOV CX,-1 ;largest possible string 3293 REPNE SCASB 3294 NOT CX ;CX = string length of s2 3295 SUB DI,CX ;point DI back to beginning 3296 REPE CMPSB ;compare string 3297 POP DS 3298 JE L1 ;strings are equal 3299 SBB AX,AX 3300 SBB AX,-1 3301 L1: 3302 */ 3303 3304 regm_t retregs1 = mSI; 3305 tym_t ty1 = e.EV.E1.Ety; 3306 if (!tyreg(ty1)) 3307 retregs1 |= mCX; 3308 codelem(cdb,e.EV.E1,&retregs1,false); 3309 3310 regm_t retregs = mDI; 3311 tym_t ty2 = e.EV.E2.Ety; 3312 if (!tyreg(ty2)) 3313 retregs |= mES; 3314 scodelem(cdb,e.EV.E2,&retregs,retregs1,false); 3315 3316 // Make sure ES contains proper segment value 3317 cdb.append(cod2_setES(ty2)); 3318 getregs_imm(cdb,mAX | mCX); 3319 3320 ubyte rex = I64 ? REX_W : 0; 3321 3322 // Load DS with right value 3323 switch (tybasic(ty1)) 3324 { 3325 case TYnptr: 3326 case TYimmutPtr: 3327 need_DS = false; 3328 break; 3329 3330 case TYsptr: 3331 if (config.wflags & WFssneds) // if sptr can't use DS segment 3332 segreg = SEG_SS; 3333 else 3334 segreg = SEG_DS; 3335 goto L1; 3336 case TYcptr: 3337 segreg = SEG_CS; 3338 L1: 3339 cdb.gen1(0x1E); // PUSH DS 3340 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3341 cdb.gen1(0x1F); // POP DS 3342 need_DS = true; 3343 break; 3344 case TYfptr: 3345 case TYvptr: 3346 case TYhptr: 3347 cdb.gen1(0x1E); // PUSH DS 3348 cdb.gen2(0x8E,modregrm(3,SEG_DS,CX)); // MOV DS,CX 3349 need_DS = true; 3350 break; 3351 default: 3352 assert(0); 3353 } 3354 3355 movregconst(cdb,AX,0,0); // MOV AX,0 3356 movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0); // MOV CX,-1 3357 getregs(cdb,mSI|mDI|mCX); 3358 cdb.gen1(0xF2); // REPNE 3359 cdb.gen1(0xAE); // SCASB 3360 genregs(cdb,0xF7,2,CX); // NOT CX 3361 code_orrex(cdb.last(),rex); 3362 genregs(cdb,0x2B,DI,CX); // SUB DI,CX 3363 code_orrex(cdb.last(),rex); 3364 cdb.gen1(0xF3); // REPE 3365 cdb.gen1(0xA6); // CMPSB 3366 if (need_DS) 3367 cdb.gen1(0x1F); // POP DS 3368 code *c4 = gennop(null); 3369 if (*pretregs != mPSW) // if not flags only 3370 { 3371 genjmp(cdb,JE,FLcode,cast(block *) c4); // JE L1 3372 getregs(cdb,mAX); 3373 genregs(cdb,0x1B,AX,AX); // SBB AX,AX 3374 code_orrex(cdb.last(),rex); 3375 cdb.genc2(0x81,(rex << 16) | modregrm(3,3,AX),cast(targ_uns)-1); // SBB AX,-1 3376 } 3377 3378 *pretregs &= ~mPSW; 3379 cdb.append(c4); 3380 fixresult(cdb,e,mAX,pretregs); 3381 } 3382 3383 /********************************* 3384 * Generate code for memcmp(s1,s2,n) intrinsic. 3385 */ 3386 3387 void cdmemcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3388 { 3389 char need_DS; 3390 int segreg; 3391 3392 /* 3393 MOV SI,s1 ;get destination pointer (s1) 3394 MOV DX,s1+2 3395 LES DI,s2 ;get source pointer (s2) 3396 MOV CX,n ;get number of bytes to compare 3397 PUSH DS 3398 MOV DS,DX 3399 XOR AX,AX 3400 REPE CMPSB ;compare string 3401 POP DS 3402 JE L1 ;strings are equal 3403 SBB AX,AX 3404 SBB AX,-1 3405 L1: 3406 */ 3407 3408 elem *e1 = e.EV.E1; 3409 assert(e1.Eoper == OPparam); 3410 3411 // Get s1 into DX:SI 3412 regm_t retregs1 = mSI; 3413 tym_t ty1 = e1.EV.E1.Ety; 3414 if (!tyreg(ty1)) 3415 retregs1 |= mDX; 3416 codelem(cdb,e1.EV.E1,&retregs1,false); 3417 3418 // Get s2 into ES:DI 3419 regm_t retregs = mDI; 3420 tym_t ty2 = e1.EV.E2.Ety; 3421 if (!tyreg(ty2)) 3422 retregs |= mES; 3423 scodelem(cdb,e1.EV.E2,&retregs,retregs1,false); 3424 freenode(e1); 3425 3426 // Get nbytes into CX 3427 regm_t retregs3 = mCX; 3428 scodelem(cdb,e.EV.E2,&retregs3,retregs | retregs1,false); 3429 3430 // Make sure ES contains proper segment value 3431 cdb.append(cod2_setES(ty2)); 3432 3433 // Load DS with right value 3434 switch (tybasic(ty1)) 3435 { 3436 case TYnptr: 3437 case TYimmutPtr: 3438 need_DS = false; 3439 break; 3440 3441 case TYsptr: 3442 if (config.wflags & WFssneds) // if sptr can't use DS segment 3443 segreg = SEG_SS; 3444 else 3445 segreg = SEG_DS; 3446 goto L1; 3447 case TYcptr: 3448 segreg = SEG_CS; 3449 L1: 3450 cdb.gen1(0x1E); // PUSH DS 3451 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3452 cdb.gen1(0x1F); // POP DS 3453 need_DS = true; 3454 break; 3455 case TYfptr: 3456 case TYvptr: 3457 case TYhptr: 3458 cdb.gen1(0x1E); // PUSH DS 3459 cdb.gen2(0x8E,modregrm(3,SEG_DS,DX)); // MOV DS,DX 3460 need_DS = true; 3461 break; 3462 default: 3463 assert(0); 3464 } 3465 3466 static if (1) 3467 { 3468 getregs(cdb,mAX); 3469 cdb.gen2(0x33,modregrm(3,AX,AX)); // XOR AX,AX 3470 code_orflag(cdb.last(), CFpsw); // keep flags 3471 } 3472 else 3473 { 3474 if (*pretregs != mPSW) // if not flags only 3475 regwithvalue(cdb,mAX,0,null,0); // put 0 in AX 3476 } 3477 3478 getregs(cdb,mCX | mSI | mDI); 3479 cdb.gen1(0xF3); // REPE 3480 cdb.gen1(0xA6); // CMPSB 3481 if (need_DS) 3482 cdb.gen1(0x1F); // POP DS 3483 if (*pretregs != mPSW) // if not flags only 3484 { 3485 code *c4 = gennop(null); 3486 genjmp(cdb,JE,FLcode,cast(block *) c4); // JE L1 3487 getregs(cdb,mAX); 3488 genregs(cdb,0x1B,AX,AX); // SBB AX,AX 3489 cdb.genc2(0x81,modregrm(3,3,AX),cast(targ_uns)-1); // SBB AX,-1 3490 cdb.append(c4); 3491 } 3492 3493 *pretregs &= ~mPSW; 3494 fixresult(cdb,e,mAX,pretregs); 3495 } 3496 3497 /********************************* 3498 * Generate code for strcpy(s1,s2) intrinsic. 3499 */ 3500 3501 void cdstrcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3502 { 3503 char need_DS; 3504 int segreg; 3505 3506 /* 3507 LES DI,s2 ;ES:DI = s2 3508 CLR AX ;scan for 0 3509 MOV CX,-1 ;largest possible string 3510 REPNE SCASB ;find end of s2 3511 NOT CX ;CX = strlen(s2) + 1 (for EOS) 3512 SUB DI,CX 3513 MOV SI,DI 3514 PUSH DS 3515 PUSH ES 3516 LES DI,s1 3517 POP DS 3518 MOV AX,DI ;return value is s1 3519 REP MOVSB 3520 POP DS 3521 */ 3522 3523 stackchanged = 1; 3524 regm_t retregs = mDI; 3525 tym_t ty2 = tybasic(e.EV.E2.Ety); 3526 if (!tyreg(ty2)) 3527 retregs |= mES; 3528 ubyte rex = I64 ? REX_W : 0; 3529 codelem(cdb,e.EV.E2,&retregs,false); 3530 3531 // Make sure ES contains proper segment value 3532 cdb.append(cod2_setES(ty2)); 3533 getregs_imm(cdb,mAX | mCX); 3534 movregconst(cdb,AX,0,1); // MOV AL,0 3535 movregconst(cdb,CX,-1,I64?64:0); // MOV CX,-1 3536 getregs(cdb,mAX|mCX|mSI|mDI); 3537 cdb.gen1(0xF2); // REPNE 3538 cdb.gen1(0xAE); // SCASB 3539 genregs(cdb,0xF7,2,CX); // NOT CX 3540 code_orrex(cdb.last(),rex); 3541 genregs(cdb,0x2B,DI,CX); // SUB DI,CX 3542 code_orrex(cdb.last(),rex); 3543 genmovreg(cdb,SI,DI); // MOV SI,DI 3544 3545 // Load DS with right value 3546 switch (ty2) 3547 { 3548 case TYnptr: 3549 case TYimmutPtr: 3550 need_DS = false; 3551 break; 3552 3553 case TYsptr: 3554 if (config.wflags & WFssneds) // if sptr can't use DS segment 3555 segreg = SEG_SS; 3556 else 3557 segreg = SEG_DS; 3558 goto L1; 3559 case TYcptr: 3560 segreg = SEG_CS; 3561 L1: 3562 cdb.gen1(0x1E); // PUSH DS 3563 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3564 cdb.genadjesp(REGSIZE * 2); 3565 need_DS = true; 3566 break; 3567 case TYfptr: 3568 case TYvptr: 3569 case TYhptr: 3570 segreg = SEG_ES; 3571 goto L1; 3572 3573 default: 3574 assert(0); 3575 } 3576 3577 retregs = mDI; 3578 tym_t ty1 = tybasic(e.EV.E1.Ety); 3579 if (!tyreg(ty1)) 3580 retregs |= mES; 3581 scodelem(cdb,e.EV.E1,&retregs,mCX|mSI,false); 3582 getregs(cdb,mAX|mCX|mSI|mDI); 3583 3584 // Make sure ES contains proper segment value 3585 if (ty2 != TYnptr || ty1 != ty2) 3586 cdb.append(cod2_setES(ty1)); 3587 else 3588 {} // ES is already same as DS 3589 3590 if (need_DS) 3591 cdb.gen1(0x1F); // POP DS 3592 if (*pretregs) 3593 genmovreg(cdb,AX,DI); // MOV AX,DI 3594 cdb.gen1(0xF3); // REP 3595 cdb.gen1(0xA4); // MOVSB 3596 3597 if (need_DS) 3598 { cdb.gen1(0x1F); // POP DS 3599 cdb.genadjesp(-(REGSIZE * 2)); 3600 } 3601 fixresult(cdb,e,mAX | mES,pretregs); 3602 } 3603 3604 /********************************* 3605 * Generate code for memcpy(s1,s2,n) intrinsic. 3606 * OPmemcpy 3607 * / \ 3608 * s1 OPparam 3609 * / \ 3610 * s2 n 3611 */ 3612 3613 void cdmemcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3614 { 3615 char need_DS; 3616 int segreg; 3617 3618 /* 3619 MOV SI,s2 3620 MOV DX,s2+2 3621 MOV CX,n 3622 LES DI,s1 3623 PUSH DS 3624 MOV DS,DX 3625 MOV AX,DI ;return value is s1 3626 REP MOVSB 3627 POP DS 3628 */ 3629 3630 elem *e2 = e.EV.E2; 3631 assert(e2.Eoper == OPparam); 3632 3633 // Get s2 into DX:SI 3634 regm_t retregs2 = mSI; 3635 tym_t ty2 = e2.EV.E1.Ety; 3636 if (!tyreg(ty2)) 3637 retregs2 |= mDX; 3638 codelem(cdb,e2.EV.E1,&retregs2,false); 3639 3640 // Get nbytes into CX 3641 regm_t retregs3 = mCX; 3642 scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false); 3643 freenode(e2); 3644 3645 // Get s1 into ES:DI 3646 regm_t retregs1 = mDI; 3647 tym_t ty1 = e.EV.E1.Ety; 3648 if (!tyreg(ty1)) 3649 retregs1 |= mES; 3650 scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false); 3651 3652 ubyte rex = I64 ? REX_W : 0; 3653 3654 // Make sure ES contains proper segment value 3655 cdb.append(cod2_setES(ty1)); 3656 3657 // Load DS with right value 3658 switch (tybasic(ty2)) 3659 { 3660 case TYnptr: 3661 case TYimmutPtr: 3662 need_DS = false; 3663 break; 3664 3665 case TYsptr: 3666 if (config.wflags & WFssneds) // if sptr can't use DS segment 3667 segreg = SEG_SS; 3668 else 3669 segreg = SEG_DS; 3670 goto L1; 3671 3672 case TYcptr: 3673 segreg = SEG_CS; 3674 L1: 3675 cdb.gen1(0x1E); // PUSH DS 3676 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3677 cdb.gen1(0x1F); // POP DS 3678 need_DS = true; 3679 break; 3680 3681 case TYfptr: 3682 case TYvptr: 3683 case TYhptr: 3684 cdb.gen1(0x1E); // PUSH DS 3685 cdb.gen2(0x8E,modregrm(3,SEG_DS,DX)); // MOV DS,DX 3686 need_DS = true; 3687 break; 3688 3689 default: 3690 assert(0); 3691 } 3692 3693 if (*pretregs) // if need return value 3694 { getregs(cdb,mAX); 3695 genmovreg(cdb,AX,DI); 3696 } 3697 3698 if (0 && I32 && config.flags4 & CFG4speed) 3699 { 3700 /* This is only faster if the memory is dword aligned, if not 3701 * it is significantly slower than just a rep movsb. 3702 */ 3703 /* mov EDX,ECX 3704 * shr ECX,2 3705 * jz L1 3706 * repe movsd 3707 * L1: nop 3708 * and EDX,3 3709 * jz L2 3710 * mov ECX,EDX 3711 * repe movsb 3712 * L2: nop 3713 */ 3714 getregs(cdb,mSI | mDI | mCX | mDX); 3715 genmovreg(cdb,DX,CX); // MOV EDX,ECX 3716 cdb.genc2(0xC1,modregrm(3,5,CX),2); // SHR ECX,2 3717 code *cx = gennop(null); 3718 genjmp(cdb, JE, FLcode, cast(block *)cx); // JZ L1 3719 cdb.gen1(0xF3); // REPE 3720 cdb.gen1(0xA5); // MOVSW 3721 cdb.append(cx); 3722 cdb.genc2(0x81, modregrm(3,4,DX),3); // AND EDX,3 3723 3724 code *cnop = gennop(null); 3725 genjmp(cdb, JE, FLcode, cast(block *)cnop); // JZ L2 3726 genmovreg(cdb,CX,DX); // MOV ECX,EDX 3727 cdb.gen1(0xF3); // REPE 3728 cdb.gen1(0xA4); // MOVSB 3729 cdb.append(cnop); 3730 } 3731 else 3732 { 3733 getregs(cdb,mSI | mDI | mCX); 3734 if (!I32 && config.flags4 & CFG4speed) // if speed optimization 3735 { cdb.gen2(0xD1,(rex << 16) | modregrm(3,5,CX)); // SHR CX,1 3736 cdb.gen1(0xF3); // REPE 3737 cdb.gen1(0xA5); // MOVSW 3738 cdb.gen2(0x11,(rex << 16) | modregrm(3,CX,CX)); // ADC CX,CX 3739 } 3740 cdb.gen1(0xF3); // REPE 3741 cdb.gen1(0xA4); // MOVSB 3742 if (need_DS) 3743 cdb.gen1(0x1F); // POP DS 3744 } 3745 fixresult(cdb,e,mES|mAX,pretregs); 3746 } 3747 3748 3749 /********************************* 3750 * Generate code for memset(s,val,n) intrinsic. 3751 * (s OPmemset (n OPparam val)) 3752 */ 3753 3754 void cdmemset(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3755 { 3756 regm_t retregs1; 3757 regm_t retregs2; 3758 regm_t retregs3; 3759 reg_t reg; 3760 reg_t vreg; 3761 tym_t ty1; 3762 int segreg; 3763 uint remainder; 3764 targ_uns numbytes,numwords; 3765 targ_size_t value; 3766 uint m; 3767 3768 //printf("cdmemset(*pretregs = %s)\n", regm_str(*pretregs)); 3769 elem *e2 = e.EV.E2; 3770 assert(e2.Eoper == OPparam); 3771 3772 ubyte rex = I64 ? REX_W : 0; 3773 3774 bool e2E2isConst = false; 3775 if (e2.EV.E2.Eoper == OPconst) 3776 { 3777 value = cast(targ_size_t)el_tolong(e2.EV.E2); 3778 value &= 0xFF; 3779 value |= value << 8; 3780 value |= value << 16; 3781 static if (value.sizeof > 4) 3782 value |= value << 32; 3783 e2E2isConst = true; 3784 } 3785 else if (e2.EV.E2.Eoper == OPstrpar) // happens if e2.EV.E2 is a struct of 0 size 3786 { 3787 value = 0; 3788 e2E2isConst = true; 3789 } 3790 else 3791 value = 0xDEADBEEF; // stop annoying false positives that value is not inited 3792 3793 if (e2.EV.E1.Eoper == OPconst) 3794 { 3795 static uint REP_THRESHOLD() { return REGSIZE * (6 + (REGSIZE == 4)); } 3796 numbytes = cast(uint)cast(targ_size_t)el_tolong(e2.EV.E1); 3797 if (numbytes <= REP_THRESHOLD && 3798 !I16 && // doesn't work for 16 bits 3799 e2E2isConst) 3800 { 3801 targ_uns offset = 0; 3802 retregs1 = *pretregs; 3803 if (!retregs1) 3804 retregs1 = ALLREGS; 3805 codelem(cdb,e.EV.E1,&retregs1,false); 3806 reg = findreg(retregs1); 3807 if (e2.EV.E2.Eoper == OPconst) 3808 { 3809 const uint mrm = buildModregrm(0,0,reg); 3810 switch (numbytes) 3811 { 3812 case 4: // MOV [reg],imm32 3813 cdb.genc2(0xC7,mrm,value); 3814 goto fixres; 3815 case 2: // MOV [reg],imm16 3816 cdb.genc2(0xC7,mrm,value); 3817 cdb.last().Iflags = CFopsize; 3818 goto fixres; 3819 case 1: // MOV [reg],imm8 3820 cdb.genc2(0xC6,mrm,value); 3821 goto fixres; 3822 3823 default: 3824 break; 3825 } 3826 } 3827 3828 regwithvalue(cdb, BYTEREGS & ~retregs1, value, &vreg, I64 ? 64 : 0); 3829 freenode(e2.EV.E2); 3830 freenode(e2); 3831 3832 m = (rex << 16) | buildModregrm(2,vreg,reg); 3833 while (numbytes >= REGSIZE) 3834 { // MOV dword ptr offset[reg],vreg 3835 cdb.gen2(0x89,m); 3836 cdb.last().IEV1.Voffset = offset; 3837 cdb.last().IFL1 = FLconst; 3838 numbytes -= REGSIZE; 3839 offset += REGSIZE; 3840 } 3841 m &= ~(rex << 16); 3842 if (numbytes & 4) 3843 { // MOV dword ptr offset[reg],vreg 3844 cdb.gen2(0x89,m); 3845 cdb.last().IEV1.Voffset = offset; 3846 cdb.last().IFL1 = FLconst; 3847 offset += 4; 3848 } 3849 if (numbytes & 2) 3850 { // MOV word ptr offset[reg],vreg 3851 cdb.gen2(0x89,m); 3852 cdb.last().IEV1.Voffset = offset; 3853 cdb.last().IFL1 = FLconst; 3854 cdb.last().Iflags = CFopsize; 3855 offset += 2; 3856 } 3857 if (numbytes & 1) 3858 { // MOV byte ptr offset[reg],vreg 3859 cdb.gen2(0x88,m); 3860 cdb.last().IEV1.Voffset = offset; 3861 cdb.last().IFL1 = FLconst; 3862 if (I64 && vreg >= 4) 3863 cdb.last().Irex |= REX; 3864 } 3865 fixres: 3866 fixresult(cdb,e,retregs1,pretregs); 3867 return; 3868 } 3869 } 3870 3871 opcode_t op; 3872 // Get nbytes into CX 3873 retregs2 = mCX; 3874 if (!I16 && e2.EV.E1.Eoper == OPconst && e2E2isConst) 3875 { 3876 remainder = numbytes & (4 - 1); 3877 numwords = numbytes / 4; // number of words 3878 op = 0xAB; // moving by words 3879 getregs(cdb,mCX); 3880 movregconst(cdb,CX,numwords,I64?64:0); // # of bytes/words 3881 } 3882 else 3883 { 3884 remainder = 0; 3885 op = 0xAA; // must move by bytes 3886 codelem(cdb,e2.EV.E1,&retregs2,false); 3887 } 3888 3889 // Get val into AX 3890 3891 retregs3 = mAX; 3892 if (!I16 && e2E2isConst) 3893 { 3894 regwithvalue(cdb, mAX, value, null, I64?64:0); 3895 freenode(e2.EV.E2); 3896 } 3897 else 3898 { 3899 scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false); 3900 3901 if (0 && I32) 3902 { 3903 cdb.gen2(0x8A,modregrm(3,AH,AL)); // MOV AH,AL 3904 cdb.genc2(0xC1,modregrm(3,4,AX),8); // SHL EAX,8 3905 cdb.gen2(0x8A,modregrm(3,AL,AH)); // MOV AL,AH 3906 cdb.genc2(0xC1,modregrm(3,4,AX),8); // SHL EAX,8 3907 cdb.gen2(0x8A,modregrm(3,AL,AH)); // MOV AL,AH 3908 } 3909 } 3910 freenode(e2); 3911 3912 // Get s into ES:DI 3913 retregs1 = mDI; 3914 ty1 = e.EV.E1.Ety; 3915 if (!tyreg(ty1)) 3916 retregs1 |= mES; 3917 scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false); 3918 reg = DI; //findreg(retregs1); 3919 3920 // Make sure ES contains proper segment value 3921 cdb.append(cod2_setES(ty1)); 3922 3923 if (*pretregs) // if need return value 3924 { 3925 getregs(cdb,mBX); 3926 genmovreg(cdb,BX,DI); 3927 } 3928 3929 getregs(cdb,mDI | mCX); 3930 if (I16 && config.flags4 & CFG4speed) // if speed optimization 3931 { 3932 getregs(cdb,mAX); 3933 cdb.gen2(0x8A,modregrm(3,AH,AL)); // MOV AH,AL 3934 cdb.gen2(0xD1,modregrm(3,5,CX)); // SHR CX,1 3935 cdb.gen1(0xF3); // REP 3936 cdb.gen1(0xAB); // STOSW 3937 cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC CX,CX 3938 op = 0xAA; 3939 } 3940 3941 cdb.gen1(0xF3); // REP 3942 cdb.gen1(op); // STOSD 3943 m = buildModregrm(2,AX,reg); 3944 if (remainder & 4) 3945 { 3946 cdb.gen2(0x89,m); 3947 cdb.last().IFL1 = FLconst; 3948 } 3949 if (remainder & 2) 3950 { 3951 cdb.gen2(0x89,m); 3952 cdb.last().Iflags = CFopsize; 3953 cdb.last().IEV1.Voffset = remainder & 4; 3954 cdb.last().IFL1 = FLconst; 3955 } 3956 if (remainder & 1) 3957 { 3958 cdb.gen2(0x88,m); 3959 cdb.last().IEV1.Voffset = remainder & ~1; 3960 cdb.last().IFL1 = FLconst; 3961 } 3962 regimmed_set(CX,0); 3963 fixresult(cdb,e,mES|mBX,pretregs); 3964 } 3965 3966 3967 /********************** 3968 * Do structure assignments. 3969 * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2). 3970 * Mebbe call cdstreq() for double assignments??? 3971 */ 3972 3973 void cdstreq(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3974 { 3975 char need_DS = false; 3976 elem *e1 = e.EV.E1; 3977 elem *e2 = e.EV.E2; 3978 int segreg; 3979 uint numbytes = cast(uint)type_size(e.ET); // # of bytes in structure/union 3980 ubyte rex = I64 ? REX_W : 0; 3981 3982 //printf("cdstreq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3983 3984 // First, load pointer to rvalue into SI 3985 regm_t srcregs = mSI; // source is DS:SI 3986 docommas(cdb,&e2); 3987 if (e2.Eoper == OPind) // if (.. = *p) 3988 { elem *e21 = e2.EV.E1; 3989 3990 segreg = SEG_DS; 3991 switch (tybasic(e21.Ety)) 3992 { 3993 case TYsptr: 3994 if (config.wflags & WFssneds) // if sptr can't use DS segment 3995 segreg = SEG_SS; 3996 break; 3997 case TYcptr: 3998 if (!(config.exe & EX_flat)) 3999 segreg = SEG_CS; 4000 break; 4001 case TYfptr: 4002 case TYvptr: 4003 case TYhptr: 4004 srcregs |= mCX; // get segment also 4005 need_DS = true; 4006 break; 4007 4008 default: 4009 break; 4010 } 4011 codelem(cdb,e21,&srcregs,false); 4012 freenode(e2); 4013 if (segreg != SEG_DS) // if not DS 4014 { 4015 getregs(cdb,mCX); 4016 cdb.gen2(0x8C,modregrm(3,segreg,CX)); // MOV CX,segreg 4017 need_DS = true; 4018 } 4019 } 4020 else if (e2.Eoper == OPvar) 4021 { 4022 if (e2.EV.Vsym.ty() & mTYfar) // if e2 is in a far segment 4023 { srcregs |= mCX; // get segment also 4024 need_DS = true; 4025 cdrelconst(cdb,e2,&srcregs); 4026 } 4027 else 4028 { 4029 segreg = segfl[el_fl(e2)]; 4030 if ((config.wflags & WFssneds) && segreg == SEG_SS || // if source is on stack 4031 segreg == SEG_CS) // if source is in CS 4032 { 4033 need_DS = true; // we need to reload DS 4034 // Load CX with segment 4035 srcregs |= mCX; 4036 getregs(cdb,mCX); 4037 cdb.gen2(0x8C, // MOV CX,[SS|CS] 4038 modregrm(3,segreg,CX)); 4039 } 4040 cdrelconst(cdb,e2,&srcregs); 4041 } 4042 freenode(e2); 4043 } 4044 else 4045 { 4046 if (!(config.exe & EX_flat)) 4047 { need_DS = true; 4048 srcregs |= mCX; 4049 } 4050 codelem(cdb,e2,&srcregs,false); 4051 } 4052 4053 // now get pointer to lvalue (destination) in ES:DI 4054 regm_t dstregs = (config.exe & EX_flat) ? mDI : mES|mDI; 4055 if (e1.Eoper == OPind) // if (*p = ..) 4056 { 4057 if (tyreg(e1.EV.E1.Ety)) 4058 dstregs = mDI; 4059 cdb.append(cod2_setES(e1.EV.E1.Ety)); 4060 scodelem(cdb,e1.EV.E1,&dstregs,srcregs,false); 4061 } 4062 else 4063 cdrelconst(cdb,e1,&dstregs); 4064 freenode(e1); 4065 4066 getregs(cdb,(srcregs | dstregs) & (mLSW | mDI)); 4067 if (need_DS) 4068 { assert(!(config.exe & EX_flat)); 4069 cdb.gen1(0x1E); // PUSH DS 4070 cdb.gen2(0x8E,modregrm(3,SEG_DS,CX)); // MOV DS,CX 4071 } 4072 if (numbytes <= REGSIZE * (6 + (REGSIZE == 4))) 4073 { 4074 while (numbytes >= REGSIZE) 4075 { 4076 cdb.gen1(0xA5); // MOVSW 4077 code_orrex(cdb.last(), rex); 4078 numbytes -= REGSIZE; 4079 } 4080 //if (numbytes) 4081 // printf("cdstreq numbytes %d\n",numbytes); 4082 while (numbytes--) 4083 cdb.gen1(0xA4); // MOVSB 4084 } 4085 else 4086 { 4087 static if (1) 4088 { 4089 uint remainder = numbytes & (REGSIZE - 1); 4090 numbytes /= REGSIZE; // number of words 4091 getregs_imm(cdb,mCX); 4092 movregconst(cdb,CX,numbytes,0); // # of bytes/words 4093 cdb.gen1(0xF3); // REP 4094 if (REGSIZE == 8) 4095 cdb.gen1(REX | REX_W); 4096 cdb.gen1(0xA5); // REP MOVSD 4097 regimmed_set(CX,0); // note that CX == 0 4098 for (; remainder; remainder--) 4099 { 4100 cdb.gen1(0xA4); // MOVSB 4101 } 4102 } 4103 else 4104 { 4105 uint movs; 4106 if (numbytes & (REGSIZE - 1)) // if odd 4107 movs = 0xA4; // MOVSB 4108 else 4109 { 4110 movs = 0xA5; // MOVSW 4111 numbytes /= REGSIZE; // # of words 4112 } 4113 getregs_imm(cdb,mCX); 4114 movregconst(cdb,CX,numbytes,0); // # of bytes/words 4115 cdb.gen1(0xF3); // REP 4116 cdb.gen1(movs); 4117 regimmed_set(CX,0); // note that CX == 0 4118 } 4119 } 4120 if (need_DS) 4121 cdb.gen1(0x1F); // POP DS 4122 assert(!(*pretregs & mPSW)); 4123 if (*pretregs) 4124 { // ES:DI points past what we want 4125 4126 cdb.genc2(0x81,(rex << 16) | modregrm(3,5,DI), type_size(e.ET)); // SUB DI,numbytes 4127 regm_t retregs = mDI; 4128 if (*pretregs & mMSW && !(config.exe & EX_flat)) 4129 retregs |= mES; 4130 fixresult(cdb,e,retregs,pretregs); 4131 } 4132 } 4133 4134 4135 /********************** 4136 * Get the address of. 4137 * Is also called by cdstreq() to set up pointer to a structure. 4138 */ 4139 4140 void cdrelconst(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4141 { 4142 //printf("cdrelconst(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4143 4144 /* The following should not happen, but cgelem.c is a little stupid. 4145 * Assertion can be tripped by func("string" == 0); and similar 4146 * things. Need to add goals to optelem() to fix this completely. 4147 */ 4148 //assert((*pretregs & mPSW) == 0); 4149 if (*pretregs & mPSW) 4150 { 4151 *pretregs &= ~mPSW; 4152 gentstreg(cdb,SP); // SP is never 0 4153 if (I64) 4154 code_orrex(cdb.last(), REX_W); 4155 } 4156 if (!*pretregs) 4157 return; 4158 4159 assert(e); 4160 tym_t tym = tybasic(e.Ety); 4161 switch (tym) 4162 { 4163 case TYstruct: 4164 case TYarray: 4165 case TYldouble: 4166 case TYildouble: 4167 case TYcldouble: 4168 tym = TYnptr; // don't confuse allocreg() 4169 if (*pretregs & (mES | mCX) || e.Ety & mTYfar) 4170 { 4171 tym = TYfptr; 4172 } 4173 break; 4174 4175 case TYifunc: 4176 tym = TYfptr; 4177 break; 4178 4179 default: 4180 if (tyfunc(tym)) 4181 tym = 4182 tyfarfunc(tym) ? TYfptr : 4183 TYnptr; 4184 break; 4185 } 4186 //assert(tym & typtr); // don't fail on (int)&a 4187 4188 SC sclass; 4189 reg_t mreg, // segment of the address (TYfptrs only) 4190 lreg; // offset of the address 4191 4192 allocreg(cdb,pretregs,&lreg,tym); 4193 if (_tysize[tym] > REGSIZE) // fptr could've been cast to long 4194 { 4195 if (*pretregs & mES) 4196 { 4197 /* Do not allocate CX or SI here, as cdstreq() needs 4198 * them preserved. cdstreq() should use scodelem() 4199 */ 4200 regm_t scratch = (mAX|mBX|mDX|mDI) & ~mask(lreg); 4201 allocreg(cdb,&scratch,&mreg,TYint); 4202 } 4203 else 4204 { 4205 mreg = lreg; 4206 lreg = findreglsw(*pretregs); 4207 } 4208 4209 /* if (get segment of function that isn't necessarily in the 4210 * current segment (i.e. CS doesn't have the right value in it) 4211 */ 4212 Symbol *s = e.EV.Vsym; 4213 if (s.Sfl == FLdatseg) 4214 { assert(0); 4215 } 4216 sclass = cast(SC) s.Sclass; 4217 const ety = tybasic(s.ty()); 4218 if ((tyfarfunc(ety) || ety == TYifunc) && 4219 (sclass == SCextern || ClassInline(sclass) || config.wflags & WFthunk) 4220 || s.Sfl == FLfardata 4221 || (s.ty() & mTYcs && s.Sseg != cseg && (LARGECODE || s.Sclass == SCcomdat)) 4222 ) 4223 { // MOV mreg,seg of symbol 4224 cdb.gencs(0xB8 + mreg,0,FLextern,s); 4225 cdb.last().Iflags = CFseg; 4226 } 4227 else 4228 { 4229 const fl = (s.ty() & mTYcs) ? FLcsdata : s.Sfl; 4230 cdb.gen2(0x8C, // MOV mreg,SEG REGISTER 4231 modregrm(3,segfl[fl],mreg)); 4232 } 4233 if (*pretregs & mES) 4234 cdb.gen2(0x8E,modregrm(3,0,mreg)); // MOV ES,mreg 4235 } 4236 getoffset(cdb,e,lreg); 4237 } 4238 4239 /********************************* 4240 * Load the offset portion of the address represented by e into 4241 * reg. 4242 */ 4243 4244 void getoffset(ref CodeBuilder cdb,elem *e,reg_t reg) 4245 { 4246 //printf("getoffset(e = %p, reg = %d)\n", e, reg); 4247 code cs = void; 4248 cs.Iflags = 0; 4249 ubyte rex = 0; 4250 cs.Irex = rex; 4251 assert(e.Eoper == OPvar || e.Eoper == OPrelconst); 4252 auto fl = el_fl(e); 4253 switch (fl) 4254 { 4255 case FLdatseg: 4256 cs.IEV2.Vpointer = e.EV.Vpointer; 4257 goto L3; 4258 4259 case FLfardata: 4260 goto L4; 4261 4262 case FLtlsdata: 4263 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 4264 { 4265 { 4266 L5: 4267 if (config.flags3 & CFG3pic) 4268 { 4269 if (I64) 4270 { 4271 /* Generate: 4272 * LEA DI,s@TLSGD[RIP] 4273 */ 4274 //assert(reg == DI); 4275 code css = void; 4276 css.Irex = REX | REX_W; 4277 css.Iop = LEA; 4278 css.Irm = modregrm(0,reg,5); 4279 if (reg & 8) 4280 css.Irex |= REX_R; 4281 css.Iflags = CFopsize; 4282 css.IFL1 = cast(ubyte)fl; 4283 css.IEV1.Vsym = e.EV.Vsym; 4284 css.IEV1.Voffset = e.EV.Voffset; 4285 cdb.gen(&css); 4286 } 4287 else 4288 { 4289 /* Generate: 4290 * LEA EAX,s@TLSGD[1*EBX+0] 4291 */ 4292 assert(reg == AX); 4293 load_localgot(cdb); 4294 code css = void; 4295 css.Iflags = 0; 4296 css.Iop = LEA; // LEA 4297 css.Irex = 0; 4298 css.Irm = modregrm(0,AX,4); 4299 css.Isib = modregrm(0,BX,5); 4300 css.IFL1 = cast(ubyte)fl; 4301 css.IEV1.Vsym = e.EV.Vsym; 4302 css.IEV1.Voffset = e.EV.Voffset; 4303 cdb.gen(&css); 4304 } 4305 return; 4306 } 4307 /* Generate: 4308 * MOV reg,GS:[00000000] 4309 * ADD reg, offset s@TLS_LE 4310 * for locals, and for globals: 4311 * MOV reg,GS:[00000000] 4312 * ADD reg, s@TLS_IE 4313 * note different fixup 4314 */ 4315 int stack = 0; 4316 if (reg == STACK) 4317 { regm_t retregs = ALLREGS; 4318 4319 reg_t regx; 4320 allocreg(cdb,&retregs,®x,TYoffset); 4321 reg = findreg(retregs); 4322 stack = 1; 4323 } 4324 4325 code css = void; 4326 css.Irex = rex; 4327 css.Iop = 0x8B; 4328 css.Irm = modregrm(0, 0, BPRM); 4329 code_newreg(&css, reg); 4330 css.Iflags = CFgs; 4331 css.IFL1 = FLconst; 4332 css.IEV1.Vuns = 0; 4333 cdb.gen(&css); // MOV reg,GS:[00000000] 4334 4335 if (e.EV.Vsym.Sclass == SCstatic || e.EV.Vsym.Sclass == SClocstat) 4336 { // ADD reg, offset s 4337 cs.Irex = rex; 4338 cs.Iop = 0x81; 4339 cs.Irm = modregrm(3,0,reg & 7); 4340 if (reg & 8) 4341 cs.Irex |= REX_B; 4342 cs.Iflags = CFoff; 4343 cs.IFL2 = cast(ubyte)fl; 4344 cs.IEV2.Vsym = e.EV.Vsym; 4345 cs.IEV2.Voffset = e.EV.Voffset; 4346 } 4347 else 4348 { // ADD reg, s 4349 cs.Irex = rex; 4350 cs.Iop = 0x03; 4351 cs.Irm = modregrm(0,0,BPRM); 4352 code_newreg(&cs, reg); 4353 cs.Iflags = CFoff; 4354 cs.IFL1 = cast(ubyte)fl; 4355 cs.IEV1.Vsym = e.EV.Vsym; 4356 cs.IEV1.Voffset = e.EV.Voffset; 4357 } 4358 cdb.gen(&cs); // ADD reg, xxxx 4359 4360 if (stack) 4361 { 4362 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4363 if (reg & 8) 4364 code_orrex(cdb.last(), REX_B); 4365 cdb.genadjesp(REGSIZE); 4366 stackchanged = 1; 4367 } 4368 break; 4369 } 4370 } 4371 else static if (TARGET_WINDOS) 4372 { 4373 if (I64) 4374 { 4375 L5: 4376 assert(reg != STACK); 4377 cs.IEV2.Vsym = e.EV.Vsym; 4378 cs.IEV2.Voffset = e.EV.Voffset; 4379 cs.Iop = 0xB8 + (reg & 7); // MOV Ereg,offset s 4380 if (reg & 8) 4381 cs.Irex |= REX_B; 4382 cs.Iflags = CFoff; // want offset only 4383 cs.IFL2 = cast(ubyte)fl; 4384 cdb.gen(&cs); 4385 break; 4386 } 4387 goto L4; 4388 } 4389 else 4390 { 4391 goto L4; 4392 } 4393 4394 case FLfunc: 4395 fl = FLextern; /* don't want PC relative addresses */ 4396 goto L4; 4397 4398 case FLextern: 4399 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 4400 { 4401 if (e.EV.Vsym.ty() & mTYthread) 4402 goto L5; 4403 } 4404 static if (TARGET_WINDOS) 4405 { 4406 if (I64 && e.EV.Vsym.ty() & mTYthread) 4407 goto L5; 4408 } 4409 goto L4; 4410 4411 case FLdata: 4412 case FLudata: 4413 case FLgot: 4414 case FLgotoff: 4415 case FLcsdata: 4416 L4: 4417 cs.IEV2.Vsym = e.EV.Vsym; 4418 cs.IEV2.Voffset = e.EV.Voffset; 4419 L3: 4420 if (reg == STACK) 4421 { stackchanged = 1; 4422 cs.Iop = 0x68; /* PUSH immed16 */ 4423 cdb.genadjesp(REGSIZE); 4424 } 4425 else 4426 { cs.Iop = 0xB8 + (reg & 7); // MOV reg,immed16 4427 if (reg & 8) 4428 cs.Irex |= REX_B; 4429 if (I64) 4430 { cs.Irex |= REX_W; 4431 if (config.flags3 & CFG3pic || config.exe == EX_WIN64) 4432 { // LEA reg,immed32[RIP] 4433 cs.Iop = LEA; 4434 cs.Irm = modregrm(0,reg & 7,5); 4435 if (reg & 8) 4436 cs.Irex = (cs.Irex & ~REX_B) | REX_R; 4437 cs.IFL1 = cast(ubyte)fl; 4438 cs.IEV1.Vsym = cs.IEV2.Vsym; 4439 cs.IEV1.Voffset = cs.IEV2.Voffset; 4440 } 4441 } 4442 } 4443 cs.Iflags = CFoff; /* want offset only */ 4444 cs.IFL2 = cast(ubyte)fl; 4445 cdb.gen(&cs); 4446 break; 4447 4448 case FLreg: 4449 /* Allow this since the tree optimizer puts & in front of */ 4450 /* register doubles. */ 4451 goto L2; 4452 case FLauto: 4453 case FLfast: 4454 case FLbprel: 4455 case FLfltreg: 4456 reflocal = true; 4457 goto L2; 4458 case FLpara: 4459 refparam = true; 4460 L2: 4461 if (reg == STACK) 4462 { regm_t retregs = ALLREGS; 4463 4464 reg_t regx; 4465 allocreg(cdb,&retregs,®x,TYoffset); 4466 reg = findreg(retregs); 4467 loadea(cdb,e,&cs,LEA,reg,0,0,0); // LEA reg,EA 4468 if (I64) 4469 code_orrex(cdb.last(), REX_W); 4470 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4471 if (reg & 8) 4472 code_orrex(cdb.last(), REX_B); 4473 cdb.genadjesp(REGSIZE); 4474 stackchanged = 1; 4475 } 4476 else 4477 { 4478 loadea(cdb,e,&cs,LEA,reg,0,0,0); // LEA reg,EA 4479 if (I64) 4480 code_orrex(cdb.last(), REX_W); 4481 } 4482 break; 4483 4484 default: 4485 debug 4486 { 4487 elem_print(e); 4488 WRFL(fl); 4489 } 4490 assert(0); 4491 } 4492 } 4493 4494 4495 /****************** 4496 * OPneg, OPsqrt, OPsin, OPcos, OPrint 4497 */ 4498 4499 void cdneg(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4500 { 4501 //printf("cdneg()\n"); 4502 //elem_print(e); 4503 if (*pretregs == 0) 4504 { 4505 codelem(cdb,e.EV.E1,pretregs,false); 4506 return; 4507 } 4508 const tyml = tybasic(e.EV.E1.Ety); 4509 const sz = _tysize[tyml]; 4510 if (tyfloating(tyml)) 4511 { 4512 if (tycomplex(tyml)) 4513 { 4514 neg_complex87(cdb, e, pretregs); 4515 return; 4516 } 4517 if (tyxmmreg(tyml) && e.Eoper == OPneg && *pretregs & XMMREGS) 4518 { 4519 xmmneg(cdb,e,pretregs); 4520 return; 4521 } 4522 if (config.inline8087 && 4523 ((*pretregs & (ALLREGS | mBP)) == 0 || e.Eoper == OPsqrt || I64)) 4524 { 4525 neg87(cdb,e,pretregs); 4526 return; 4527 } 4528 regm_t retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 4529 codelem(cdb,e.EV.E1,&retregs,false); 4530 getregs(cdb,retregs); 4531 if (I32) 4532 { 4533 const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 4534 cdb.genc2(0x81,modregrm(3,6,reg),0x80000000); // XOR EDX,sign bit 4535 } 4536 else 4537 { 4538 const reg = (sz == 8) ? AX : findregmsw(retregs); 4539 cdb.genc2(0x81,modregrm(3,6,reg),0x8000); // XOR AX,0x8000 4540 } 4541 fixresult(cdb,e,retregs,pretregs); 4542 return; 4543 } 4544 4545 const uint isbyte = sz == 1; 4546 const possregs = (isbyte) ? BYTEREGS : allregs; 4547 regm_t retregs = *pretregs & possregs; 4548 if (retregs == 0) 4549 retregs = possregs; 4550 codelem(cdb,e.EV.E1,&retregs,false); 4551 getregs(cdb,retregs); // retregs will be destroyed 4552 if (sz <= REGSIZE) 4553 { 4554 const reg = findreg(retregs); 4555 uint rex = (I64 && sz == 8) ? REX_W : 0; 4556 if (I64 && sz == 1 && reg >= 4) 4557 rex |= REX; 4558 cdb.gen2(0xF7 ^ isbyte,(rex << 16) | modregrmx(3,3,reg)); // NEG reg 4559 if (!I16 && _tysize[tyml] == SHORTSIZE && *pretregs & mPSW) 4560 cdb.last().Iflags |= CFopsize | CFpsw; 4561 *pretregs &= mBP | ALLREGS; // flags already set 4562 } 4563 else if (sz == 2 * REGSIZE) 4564 { 4565 const msreg = findregmsw(retregs); 4566 cdb.gen2(0xF7,modregrm(3,3,msreg)); // NEG msreg 4567 const lsreg = findreglsw(retregs); 4568 cdb.gen2(0xF7,modregrm(3,3,lsreg)); // NEG lsreg 4569 code_orflag(cdb.last(), CFpsw); // need flag result of previous NEG 4570 cdb.genc2(0x81,modregrm(3,3,msreg),0); // SBB msreg,0 4571 } 4572 else 4573 assert(0); 4574 fixresult(cdb,e,retregs,pretregs); 4575 } 4576 4577 4578 /****************** 4579 * Absolute value operator 4580 */ 4581 4582 4583 void cdabs(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 4584 { 4585 //printf("cdabs(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4586 if (*pretregs == 0) 4587 { 4588 codelem(cdb,e.EV.E1,pretregs,false); 4589 return; 4590 } 4591 const tyml = tybasic(e.EV.E1.Ety); 4592 const sz = _tysize[tyml]; 4593 const rex = (I64 && sz == 8) ? REX_W : 0; 4594 if (tyfloating(tyml)) 4595 { 4596 if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64)) 4597 { 4598 neg87(cdb,e,pretregs); 4599 return; 4600 } 4601 regm_t retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 4602 codelem(cdb,e.EV.E1,&retregs,false); 4603 getregs(cdb,retregs); 4604 if (I32) 4605 { 4606 const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 4607 cdb.genc2(0x81,modregrm(3,4,reg),0x7FFFFFFF); // AND EDX,~sign bit 4608 } 4609 else 4610 { 4611 const reg = (sz == 8) ? AX : findregmsw(retregs); 4612 cdb.genc2(0x81,modregrm(3,4,reg),0x7FFF); // AND AX,0x7FFF 4613 } 4614 fixresult(cdb,e,retregs,pretregs); 4615 return; 4616 } 4617 4618 const uint isbyte = sz == 1; 4619 assert(isbyte == 0); 4620 regm_t possregs = (sz <= REGSIZE) ? cast(regm_t) mAX : allregs; 4621 if (!I16 && sz == REGSIZE) 4622 possregs = allregs; 4623 regm_t retregs = *pretregs & possregs; 4624 if (retregs == 0) 4625 retregs = possregs; 4626 codelem(cdb,e.EV.E1,&retregs,false); 4627 getregs(cdb,retregs); // retregs will be destroyed 4628 if (sz <= REGSIZE) 4629 { 4630 /* CWD 4631 XOR AX,DX 4632 SUB AX,DX 4633 or: 4634 MOV r,reg 4635 SAR r,63 4636 XOR reg,r 4637 SUB reg,r 4638 */ 4639 reg_t reg; 4640 reg_t r; 4641 4642 if (!I16 && sz == REGSIZE) 4643 { regm_t scratch = allregs & ~retregs; 4644 reg = findreg(retregs); 4645 allocreg(cdb,&scratch,&r,TYint); 4646 getregs(cdb,retregs); 4647 genmovreg(cdb,r,reg); // MOV r,reg 4648 cdb.genc2(0xC1,modregrmx(3,7,r),REGSIZE * 8 - 1); // SAR r,31/63 4649 code_orrex(cdb.last(), rex); 4650 } 4651 else 4652 { 4653 reg = AX; 4654 r = DX; 4655 getregs(cdb,mDX); 4656 if (!I16 && sz == SHORTSIZE) 4657 cdb.gen1(0x98); // CWDE 4658 cdb.gen1(0x99); // CWD 4659 code_orrex(cdb.last(), rex); 4660 } 4661 cdb.gen2(0x33 ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // XOR reg,r 4662 cdb.gen2(0x2B ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // SUB reg,r 4663 if (!I16 && sz == SHORTSIZE && *pretregs & mPSW) 4664 cdb.last().Iflags |= CFopsize | CFpsw; 4665 if (*pretregs & mPSW) 4666 cdb.last().Iflags |= CFpsw; 4667 *pretregs &= ~mPSW; // flags already set 4668 } 4669 else if (sz == 2 * REGSIZE) 4670 { 4671 /* or DX,DX 4672 jns L2 4673 neg DX 4674 neg AX 4675 sbb DX,0 4676 L2: 4677 */ 4678 4679 code *cnop = gennop(null); 4680 const msreg = findregmsw(retregs); 4681 const lsreg = findreglsw(retregs); 4682 genregs(cdb,0x09,msreg,msreg); // OR msreg,msreg 4683 genjmp(cdb,JNS,FLcode,cast(block *)cnop); 4684 cdb.gen2(0xF7,modregrm(3,3,msreg)); // NEG msreg 4685 cdb.gen2(0xF7,modregrm(3,3,lsreg)); // NEG lsreg+1 4686 cdb.genc2(0x81,modregrm(3,3,msreg),0); // SBB msreg,0 4687 cdb.append(cnop); 4688 } 4689 else 4690 assert(0); 4691 fixresult(cdb,e,retregs,pretregs); 4692 } 4693 4694 /************************** 4695 * Post increment and post decrement. 4696 */ 4697 4698 void cdpost(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4699 { 4700 //printf("cdpost(pretregs = %s)\n", regm_str(*pretregs)); 4701 code cs = void; 4702 const op = e.Eoper; // OPxxxx 4703 if (*pretregs == 0) // if nothing to return 4704 { 4705 cdaddass(cdb,e,pretregs); 4706 return; 4707 } 4708 const tym_t tyml = tybasic(e.EV.E1.Ety); 4709 const sz = _tysize[tyml]; 4710 elem *e2 = e.EV.E2; 4711 const rex = (I64 && sz == 8) ? REX_W : 0; 4712 4713 if (tyfloating(tyml)) 4714 { 4715 if (config.fpxmmregs && tyxmmreg(tyml) && 4716 !tycomplex(tyml) // SIMD code is not set up to deal with complex 4717 ) 4718 { 4719 xmmpost(cdb,e,pretregs); 4720 return; 4721 } 4722 4723 if (config.inline8087) 4724 { 4725 post87(cdb,e,pretregs); 4726 return; 4727 } 4728 static if (TARGET_WINDOS) 4729 { 4730 assert(sz <= 8); 4731 getlvalue(cdb,&cs,e.EV.E1,DOUBLEREGS); 4732 freenode(e.EV.E1); 4733 regm_t idxregs = idxregm(&cs); // mask of index regs used 4734 cs.Iop = 0x8B; /* MOV DOUBLEREGS,EA */ 4735 fltregs(cdb,&cs,tyml); 4736 stackchanged = 1; 4737 int stackpushsave = stackpush; 4738 regm_t retregs; 4739 if (sz == 8) 4740 { 4741 if (I32) 4742 { 4743 cdb.gen1(0x50 + DX); // PUSH DOUBLEREGS 4744 cdb.gen1(0x50 + AX); 4745 stackpush += DOUBLESIZE; 4746 retregs = DOUBLEREGS2_32; 4747 } 4748 else 4749 { 4750 cdb.gen1(0x50 + AX); 4751 cdb.gen1(0x50 + BX); 4752 cdb.gen1(0x50 + CX); 4753 cdb.gen1(0x50 + DX); /* PUSH DOUBLEREGS */ 4754 stackpush += DOUBLESIZE + DOUBLESIZE; 4755 4756 cdb.gen1(0x50 + AX); 4757 cdb.gen1(0x50 + BX); 4758 cdb.gen1(0x50 + CX); 4759 cdb.gen1(0x50 + DX); /* PUSH DOUBLEREGS */ 4760 retregs = DOUBLEREGS_16; 4761 } 4762 } 4763 else 4764 { 4765 stackpush += FLOATSIZE; /* so we know something is on */ 4766 if (!I32) 4767 cdb.gen1(0x50 + DX); 4768 cdb.gen1(0x50 + AX); 4769 retregs = FLOATREGS2; 4770 } 4771 cdb.genadjesp(stackpush - stackpushsave); 4772 4773 cgstate.stackclean++; 4774 scodelem(cdb,e2,&retregs,idxregs,false); 4775 cgstate.stackclean--; 4776 4777 if (tyml == TYdouble || tyml == TYdouble_alias) 4778 { 4779 retregs = DOUBLEREGS; 4780 callclib(cdb,e,(op == OPpostinc) ? CLIB.dadd : CLIB.dsub, 4781 &retregs,idxregs); 4782 } 4783 else /* tyml == TYfloat */ 4784 { 4785 retregs = FLOATREGS; 4786 callclib(cdb,e,(op == OPpostinc) ? CLIB.fadd : CLIB.fsub, 4787 &retregs,idxregs); 4788 } 4789 cs.Iop = 0x89; /* MOV EA,DOUBLEREGS */ 4790 fltregs(cdb,&cs,tyml); 4791 stackpushsave = stackpush; 4792 if (tyml == TYdouble || tyml == TYdouble_alias) 4793 { if (*pretregs == mSTACK) 4794 retregs = mSTACK; /* leave result on stack */ 4795 else 4796 { 4797 if (I32) 4798 { 4799 cdb.gen1(0x58 + AX); 4800 cdb.gen1(0x58 + DX); 4801 } 4802 else 4803 { 4804 cdb.gen1(0x58 + DX); 4805 cdb.gen1(0x58 + CX); 4806 cdb.gen1(0x58 + BX); 4807 cdb.gen1(0x58 + AX); 4808 } 4809 stackpush -= DOUBLESIZE; 4810 retregs = DOUBLEREGS; 4811 } 4812 } 4813 else 4814 { 4815 cdb.gen1(0x58 + AX); 4816 if (!I32) 4817 cdb.gen1(0x58 + DX); 4818 stackpush -= FLOATSIZE; 4819 retregs = FLOATREGS; 4820 } 4821 cdb.genadjesp(stackpush - stackpushsave); 4822 fixresult(cdb,e,retregs,pretregs); 4823 return; 4824 } 4825 } 4826 if (tyxmmreg(tyml)) 4827 { 4828 xmmpost(cdb,e,pretregs); 4829 return; 4830 } 4831 4832 assert(e2.Eoper == OPconst); 4833 uint isbyte = (sz == 1); 4834 regm_t possregs = isbyte ? BYTEREGS : allregs; 4835 getlvalue(cdb,&cs,e.EV.E1,0); 4836 freenode(e.EV.E1); 4837 regm_t idxregs = idxregm(&cs); // mask of index regs used 4838 if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 && 4839 (!I16 || (idxregs & (mBX | mSI | mDI | mBP)))) 4840 { 4841 // Generate: 4842 // TEST reg,reg 4843 // LEA reg,n[reg] // don't affect flags 4844 reg_t reg = cs.Irm & 7; 4845 if (cs.Irex & REX_B) 4846 reg |= 8; 4847 cs.Iop = 0x85 ^ isbyte; 4848 code_newreg(&cs, reg); 4849 cs.Iflags |= CFpsw; 4850 cdb.gen(&cs); // TEST reg,reg 4851 4852 // If lvalue is a register variable, we must mark it as modified 4853 modEA(cdb,&cs); 4854 4855 auto n = e2.EV.Vint; 4856 if (op == OPpostdec) 4857 n = -n; 4858 int rm = reg; 4859 if (I16) 4860 { 4861 static immutable byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; // copied from cod1.c 4862 rm = regtorm[reg]; 4863 } 4864 cdb.genc1(LEA,(rex << 16) | buildModregrm(2,reg,rm),FLconst,n); // LEA reg,n[reg] 4865 return; 4866 } 4867 else if (sz <= REGSIZE || tyfv(tyml)) 4868 { 4869 code cs2 = void; 4870 4871 cs.Iop = 0x8B ^ isbyte; 4872 regm_t retregs = possregs & ~idxregs & *pretregs; 4873 if (!tyfv(tyml)) 4874 { 4875 if (retregs == 0) 4876 retregs = possregs & ~idxregs; 4877 } 4878 else /* tyfv(tyml) */ 4879 { 4880 if ((retregs &= mLSW) == 0) 4881 retregs = mLSW & ~idxregs; 4882 /* Can't use LES if the EA uses ES as a seg override */ 4883 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 4884 { cs.Iop = 0xC4; /* LES */ 4885 getregs(cdb,mES); // allocate ES 4886 } 4887 } 4888 reg_t reg; 4889 allocreg(cdb,&retregs,®,TYint); 4890 code_newreg(&cs, reg); 4891 if (sz == 1 && I64 && reg >= 4) 4892 cs.Irex |= REX; 4893 cdb.gen(&cs); // MOV reg,EA 4894 cs2 = cs; 4895 4896 /* If lvalue is a register variable, we must mark it as modified */ 4897 modEA(cdb,&cs); 4898 4899 cs.Iop = 0x81 ^ isbyte; 4900 cs.Irm &= ~cast(int)modregrm(0,7,0); // reg field = 0 4901 cs.Irex &= ~REX_R; 4902 if (op == OPpostdec) 4903 cs.Irm |= modregrm(0,5,0); /* SUB */ 4904 cs.IFL2 = FLconst; 4905 targ_int n = e2.EV.Vint; 4906 cs.IEV2.Vint = n; 4907 if (n == 1) /* can use INC or DEC */ 4908 { 4909 cs.Iop |= 0xFE; /* xFE is dec byte, xFF is word */ 4910 if (op == OPpostdec) 4911 NEWREG(cs.Irm,1); // DEC EA 4912 else 4913 NEWREG(cs.Irm,0); // INC EA 4914 } 4915 else if (n == -1) // can use INC or DEC 4916 { 4917 cs.Iop |= 0xFE; // xFE is dec byte, xFF is word 4918 if (op == OPpostinc) 4919 NEWREG(cs.Irm,1); // DEC EA 4920 else 4921 NEWREG(cs.Irm,0); // INC EA 4922 } 4923 4924 // For scheduling purposes, we wish to replace: 4925 // MOV reg,EA 4926 // OP EA 4927 // with: 4928 // MOV reg,EA 4929 // OP reg 4930 // MOV EA,reg 4931 // ~OP reg 4932 if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 4933 config.target_cpu >= TARGET_Pentium && 4934 config.flags4 & CFG4speed) 4935 { 4936 // Replace EA in cs with reg 4937 cs.Irm = (cs.Irm & ~cast(int)modregrm(3,0,7)) | modregrm(3,0,reg & 7); 4938 if (reg & 8) 4939 { cs.Irex &= ~REX_R; 4940 cs.Irex |= REX_B; 4941 } 4942 else 4943 cs.Irex &= ~REX_B; 4944 if (I64 && sz == 1 && reg >= 4) 4945 cs.Irex |= REX; 4946 cdb.gen(&cs); // ADD/SUB reg,const 4947 4948 // Reverse MOV direction 4949 cs2.Iop ^= 2; 4950 cdb.gen(&cs2); // MOV EA,reg 4951 4952 // Toggle INC <. DEC, ADD <. SUB 4953 cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0); 4954 cdb.gen(&cs); 4955 4956 if (*pretregs & mPSW) 4957 { *pretregs &= ~mPSW; // flags already set 4958 code_orflag(cdb.last(),CFpsw); 4959 } 4960 } 4961 else 4962 cdb.gen(&cs); // ADD/SUB EA,const 4963 4964 freenode(e2); 4965 if (tyfv(tyml)) 4966 { 4967 reg_t preg; 4968 4969 getlvalue_msw(&cs); 4970 if (*pretregs & mES) 4971 { 4972 preg = ES; 4973 /* ES is already loaded if CFes is 0 */ 4974 cs.Iop = ((cs.Iflags & CFSEG) == CFes) ? 0x8E : NOP; 4975 NEWREG(cs.Irm,0); /* MOV ES,EA+2 */ 4976 } 4977 else 4978 { 4979 regm_t retregsx = *pretregs & mMSW; 4980 if (!retregsx) 4981 retregsx = mMSW; 4982 allocreg(cdb,&retregsx,&preg,TYint); 4983 cs.Iop = 0x8B; 4984 if (I32) 4985 cs.Iflags |= CFopsize; 4986 NEWREG(cs.Irm,preg); /* MOV preg,EA+2 */ 4987 } 4988 getregs(cdb,mask(preg)); 4989 cdb.gen(&cs); 4990 retregs = mask(reg) | mask(preg); 4991 } 4992 fixresult(cdb,e,retregs,pretregs); 4993 return; 4994 } 4995 else if (tyml == TYhptr) 4996 { 4997 uint rvalue; 4998 reg_t lreg; 4999 reg_t rtmp; 5000 regm_t mtmp; 5001 5002 rvalue = e2.EV.Vlong; 5003 freenode(e2); 5004 5005 // If h--, convert to h++ 5006 if (e.Eoper == OPpostdec) 5007 rvalue = -rvalue; 5008 5009 regm_t retregs = mLSW & ~idxregs & *pretregs; 5010 if (!retregs) 5011 retregs = mLSW & ~idxregs; 5012 allocreg(cdb,&retregs,&lreg,TYint); 5013 5014 // Can't use LES if the EA uses ES as a seg override 5015 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 5016 { cs.Iop = 0xC4; 5017 retregs |= mES; 5018 getregs(cdb,mES|mCX); // allocate ES 5019 cs.Irm |= modregrm(0,lreg,0); 5020 cdb.gen(&cs); // LES lreg,EA 5021 } 5022 else 5023 { cs.Iop = 0x8B; 5024 retregs |= mDX; 5025 getregs(cdb,mDX|mCX); 5026 cs.Irm |= modregrm(0,lreg,0); 5027 cdb.gen(&cs); // MOV lreg,EA 5028 NEWREG(cs.Irm,DX); 5029 getlvalue_msw(&cs); 5030 cdb.gen(&cs); // MOV DX,EA+2 5031 getlvalue_lsw(&cs); 5032 } 5033 5034 // Allocate temporary register, rtmp 5035 mtmp = ALLREGS & ~mCX & ~idxregs & ~retregs; 5036 allocreg(cdb,&mtmp,&rtmp,TYint); 5037 5038 movregconst(cdb,rtmp,rvalue >> 16,0); // MOV rtmp,e2+2 5039 getregs(cdb,mtmp); 5040 cs.Iop = 0x81; 5041 NEWREG(cs.Irm,0); 5042 cs.IFL2 = FLconst; 5043 cs.IEV2.Vint = rvalue; 5044 cdb.gen(&cs); // ADD EA,e2 5045 code_orflag(cdb.last(),CFpsw); 5046 cdb.genc2(0x81,modregrm(3,2,rtmp),0); // ADC rtmp,0 5047 genshift(cdb); // MOV CX,offset __AHSHIFT 5048 cdb.gen2(0xD3,modregrm(3,4,rtmp)); // SHL rtmp,CL 5049 cs.Iop = 0x01; 5050 NEWREG(cs.Irm,rtmp); // ADD EA+2,rtmp 5051 getlvalue_msw(&cs); 5052 cdb.gen(&cs); 5053 fixresult(cdb,e,retregs,pretregs); 5054 return; 5055 } 5056 else if (sz == 2 * REGSIZE) 5057 { 5058 regm_t retregs = allregs & ~idxregs & *pretregs; 5059 if ((retregs & mLSW) == 0) 5060 retregs |= mLSW & ~idxregs; 5061 if ((retregs & mMSW) == 0) 5062 retregs |= ALLREGS & mMSW; 5063 assert(retregs & mMSW && retregs & mLSW); 5064 reg_t reg; 5065 allocreg(cdb,&retregs,®,tyml); 5066 uint sreg = findreglsw(retregs); 5067 cs.Iop = 0x8B; 5068 cs.Irm |= modregrm(0,sreg,0); 5069 cdb.gen(&cs); // MOV sreg,EA 5070 NEWREG(cs.Irm,reg); 5071 getlvalue_msw(&cs); 5072 cdb.gen(&cs); // MOV reg,EA+2 5073 cs.Iop = 0x81; 5074 cs.Irm &= ~cast(int)modregrm(0,7,0); /* reg field = 0 for ADD */ 5075 if (op == OPpostdec) 5076 cs.Irm |= modregrm(0,5,0); /* SUB */ 5077 getlvalue_lsw(&cs); 5078 cs.IFL2 = FLconst; 5079 cs.IEV2.Vlong = e2.EV.Vlong; 5080 cdb.gen(&cs); // ADD/SUB EA,const 5081 code_orflag(cdb.last(),CFpsw); 5082 getlvalue_msw(&cs); 5083 cs.IEV2.Vlong = 0; 5084 if (op == OPpostinc) 5085 cs.Irm ^= modregrm(0,2,0); /* ADC */ 5086 else 5087 cs.Irm ^= modregrm(0,6,0); /* SBB */ 5088 cs.IEV2.Vlong = cast(targ_long)(e2.EV.Vullong >> (REGSIZE * 8)); 5089 cdb.gen(&cs); // ADC/SBB EA,0 5090 freenode(e2); 5091 fixresult(cdb,e,retregs,pretregs); 5092 return; 5093 } 5094 else 5095 { 5096 assert(0); 5097 } 5098 } 5099 5100 5101 void cderr(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5102 { 5103 debug 5104 elem_print(e); 5105 5106 //printf("op = %d, %d\n", e.Eoper, OPstring); 5107 //printf("string = %p, len = %d\n", e.EV.ss.Vstring, e.EV.ss.Vstrlen); 5108 //printf("string = '%.*s'\n", cast(int)e.EV.ss.Vstrlen, e.EV.ss.Vstring); 5109 assert(0); 5110 } 5111 5112 void cdinfo(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5113 { 5114 switch (e.EV.E1.Eoper) 5115 { 5116 version (MARS) 5117 { 5118 case OPdctor: 5119 codelem(cdb,e.EV.E2,pretregs,false); 5120 regm_t retregs = 0; 5121 codelem(cdb,e.EV.E1,&retregs,false); 5122 break; 5123 } 5124 version (SCPP) 5125 { 5126 case OPdtor: 5127 cdcomma(cdb,e,pretregs); 5128 break; 5129 case OPctor: 5130 codelem(cdb,e.EV.E2,pretregs,false); 5131 regm_t retregs = 0; 5132 codelem(cdb,e.EV.E1,&retregs,false); 5133 break; 5134 case OPmark: 5135 if (0 && config.exe == EX_WIN32) 5136 { 5137 const idx = except_index_get(); 5138 except_mark(); 5139 codelem(cdb,e.EV.E2,pretregs,false); 5140 if (config.exe == EX_WIN32 && idx != except_index_get()) 5141 { usednteh |= NTEHcleanup; 5142 nteh_gensindex(cdb,idx - 1); 5143 } 5144 except_release(); 5145 assert(idx == except_index_get()); 5146 } 5147 else 5148 { 5149 code cs = void; 5150 cs.Iop = ESCAPE | ESCmark; 5151 cs.Iflags = 0; 5152 cs.Irex = 0; 5153 cdb.gen(&cs); 5154 codelem(cdb,e.EV.E2,pretregs,false); 5155 cs.Iop = ESCAPE | ESCrelease; 5156 cdb.gen(&cs); 5157 } 5158 freenode(e.EV.E1); 5159 break; 5160 } 5161 default: 5162 assert(0); 5163 } 5164 } 5165 5166 /******************************************* 5167 * D constructor. 5168 */ 5169 5170 void cddctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5171 { 5172 /* Generate: 5173 ESCAPE | ESCdctor 5174 MOV sindex[BP],index 5175 */ 5176 usednteh |= EHcleanup; 5177 if (config.ehmethod == EHmethod.EH_WIN32) 5178 { usednteh |= NTEHcleanup | NTEH_try; 5179 nteh_usevars(); 5180 } 5181 assert(*pretregs == 0); 5182 code cs; 5183 cs.Iop = ESCAPE | ESCdctor; // mark start of EH range 5184 cs.Iflags = 0; 5185 cs.Irex = 0; 5186 cs.IFL1 = FLctor; 5187 cs.IEV1.Vtor = e; 5188 cdb.gen(&cs); 5189 nteh_gensindex(cdb,0); // the actual index will be patched in later 5190 // by except_fillInEHTable() 5191 } 5192 5193 /******************************************* 5194 * D destructor. 5195 */ 5196 5197 void cdddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5198 { 5199 if (config.ehmethod == EHmethod.EH_DWARF) 5200 { 5201 usednteh |= EHcleanup; 5202 5203 code cs; 5204 cs.Iop = ESCAPE | ESCddtor; // mark end of EH range and where landing pad is 5205 cs.Iflags = 0; 5206 cs.Irex = 0; 5207 cs.IFL1 = FLdtor; 5208 cs.IEV1.Vtor = e; 5209 cdb.gen(&cs); 5210 5211 // Mark all registers as destroyed 5212 getregsNoSave(allregs); 5213 5214 assert(*pretregs == 0); 5215 codelem(cdb,e.EV.E1,pretregs,false); 5216 return; 5217 } 5218 else 5219 { 5220 /* Generate: 5221 ESCAPE | ESCddtor 5222 MOV sindex[BP],index 5223 CALL dtor 5224 JMP L1 5225 Ldtor: 5226 ... e.EV.E1 ... 5227 RET 5228 L1: NOP 5229 */ 5230 usednteh |= EHcleanup; 5231 if (config.ehmethod == EHmethod.EH_WIN32) 5232 { usednteh |= NTEHcleanup | NTEH_try; 5233 nteh_usevars(); 5234 } 5235 5236 code cs; 5237 cs.Iop = ESCAPE | ESCddtor; 5238 cs.Iflags = 0; 5239 cs.Irex = 0; 5240 cs.IFL1 = FLdtor; 5241 cs.IEV1.Vtor = e; 5242 cdb.gen(&cs); 5243 5244 nteh_gensindex(cdb,0); // the actual index will be patched in later 5245 // by except_fillInEHTable() 5246 5247 // Mark all registers as destroyed 5248 getregsNoSave(allregs); 5249 5250 assert(*pretregs == 0); 5251 CodeBuilder cdbx; 5252 cdbx.ctor(); 5253 codelem(cdbx,e.EV.E1,pretregs,false); 5254 cdbx.gen1(0xC3); // RET 5255 code *c = cdbx.finish(); 5256 5257 int nalign = 0; 5258 if (STACKALIGN >= 16) 5259 { 5260 nalign = STACKALIGN - REGSIZE; 5261 cod3_stackadj(cdb, nalign); 5262 } 5263 calledafunc = 1; 5264 genjmp(cdb,0xE8,FLcode,cast(block *)c); // CALL Ldtor 5265 if (nalign) 5266 cod3_stackadj(cdb, -nalign); 5267 5268 code *cnop = gennop(null); 5269 5270 genjmp(cdb,JMP,FLcode,cast(block *)cnop); 5271 cdb.append(cdbx); 5272 cdb.append(cnop); 5273 return; 5274 } 5275 } 5276 5277 5278 /******************************************* 5279 * C++ constructor. 5280 */ 5281 5282 void cdctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5283 { 5284 version (SCPP) 5285 { 5286 usednteh |= EHcleanup; 5287 if (config.exe == EX_WIN32) 5288 usednteh |= NTEHcleanup; 5289 assert(*pretregs == 0); 5290 5291 code cs = void; 5292 cs.Iop = ESCAPE | ESCctor; 5293 cs.Iflags = 0; 5294 cs.Irex = 0; 5295 cs.IFL1 = FLctor; 5296 cs.IEV1.Vtor = e; 5297 cdb.gen(&cs); 5298 } 5299 } 5300 5301 void cddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5302 { 5303 version (SCPP) 5304 { 5305 usednteh |= EHcleanup; 5306 if (config.exe == EX_WIN32) 5307 usednteh |= NTEHcleanup; 5308 assert(*pretregs == 0); 5309 5310 code cs = void; 5311 cs.Iop = ESCAPE | ESCdtor; 5312 cs.Iflags = 0; 5313 cs.Irex = 0; 5314 cs.IFL1 = FLdtor; 5315 cs.IEV1.Vtor = e; 5316 cdb.gen(&cs); 5317 } 5318 } 5319 5320 void cdmark(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5321 { 5322 } 5323 5324 static if (!NTEXCEPTIONS) 5325 { 5326 void cdsetjmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5327 { 5328 assert(0); 5329 } 5330 } 5331 5332 /***************************************** 5333 */ 5334 5335 void cdvoid(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5336 { 5337 assert(*pretregs == 0); 5338 codelem(cdb,e.EV.E1,pretregs,false); 5339 } 5340 5341 /***************************************** 5342 */ 5343 5344 void cdhalt(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5345 { 5346 assert(*pretregs == 0); 5347 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); 5348 } 5349 5350 }