1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1984-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod2.d, backend/cod2.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod2.d 11 */ 12 13 module dmd.backend.cod2; 14 15 version (SCPP) 16 version = COMPILE; 17 version (MARS) 18 version = COMPILE; 19 20 version (COMPILE) 21 { 22 23 import core.stdc.stdio; 24 import core.stdc.stdlib; 25 import core.stdc.string; 26 27 import dmd.backend.backend; 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.code; 31 import dmd.backend.code_x86; 32 import dmd.backend.codebuilder; 33 import dmd.backend.mem; 34 import dmd.backend.el; 35 import dmd.backend.exh; 36 import dmd.backend.global; 37 import dmd.backend.oper; 38 import dmd.backend.ty; 39 import dmd.backend.type; 40 import dmd.backend.xmm; 41 42 extern (C++): 43 44 nothrow: 45 46 int REGSIZE(); 47 48 extern __gshared CGstate cgstate; 49 extern __gshared ubyte[FLMAX] segfl; 50 extern __gshared bool[FLMAX] stackfl; 51 52 __gshared int cdcmp_flag; 53 54 private extern (D) uint mask(uint m) { return 1 << m; } 55 56 // from divcoeff.c 57 extern (C) 58 { 59 bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost); 60 bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost); 61 } 62 63 /******************************* 64 * Swap two registers. 65 */ 66 67 private void swap(reg_t *a,reg_t *b) 68 { 69 const tmp = *a; 70 *a = *b; 71 *b = tmp; 72 } 73 74 75 /******************************************* 76 * Returns: true if cannot use this EA in anything other than a MOV instruction. 77 */ 78 79 bool movOnly(const elem *e) 80 { 81 if (config.exe & EX_OSX64 && config.flags3 & CFG3pic && e.Eoper == OPvar) 82 { 83 const s = e.EV.Vsym; 84 // Fixups for these can only be done with a MOV 85 if (s.Sclass == SCglobal || s.Sclass == SCextern || 86 s.Sclass == SCcomdat || s.Sclass == SCcomdef) 87 return true; 88 } 89 return false; 90 } 91 92 /******************************** 93 * Determine index registers used by addressing mode. 94 * Index is rm of modregrm field. 95 * Returns: 96 * mask of index registers 97 */ 98 99 regm_t idxregm(const code* c) 100 { 101 const rm = c.Irm; 102 regm_t idxm; 103 if ((rm & 0xC0) != 0xC0) /* if register is not the destination */ 104 { 105 if (I16) 106 { 107 static immutable ubyte[8] idxrm = [mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX]; 108 idxm = idxrm[rm & 7]; 109 } 110 else 111 { 112 if ((rm & 7) == 4) /* if sib byte */ 113 { 114 const sib = c.Isib; 115 reg_t idxreg = (sib >> 3) & 7; 116 // scaled index reg 117 idxm = mask(idxreg | ((c.Irex & REX_X) ? 8 : 0)); 118 119 if ((sib & 7) == 5 && (rm & 0xC0) == 0) 120 { } 121 else 122 idxm |= mask((sib & 7) | ((c.Irex & REX_B) ? 8 : 0)); 123 } 124 else 125 idxm = mask((rm & 7) | ((c.Irex & REX_B) ? 8 : 0)); 126 } 127 } 128 return idxm; 129 } 130 131 132 static if (TARGET_WINDOS) 133 { 134 /*************************** 135 * Gen code for call to floating point routine. 136 */ 137 138 void opdouble(ref CodeBuilder cdb, elem *e,regm_t *pretregs,uint clib) 139 { 140 if (config.inline8087) 141 { 142 orth87(cdb,e,pretregs); 143 return; 144 } 145 146 regm_t retregs1,retregs2; 147 if (tybasic(e.EV.E1.Ety) == TYfloat) 148 { 149 clib += CLIB.fadd - CLIB.dadd; /* convert to float operation */ 150 retregs1 = FLOATREGS; 151 retregs2 = FLOATREGS2; 152 } 153 else 154 { 155 if (I32) 156 { retregs1 = DOUBLEREGS_32; 157 retregs2 = DOUBLEREGS2_32; 158 } 159 else 160 { retregs1 = mSTACK; 161 retregs2 = DOUBLEREGS_16; 162 } 163 } 164 165 codelem(cdb,e.EV.E1, &retregs1,false); 166 if (retregs1 & mSTACK) 167 cgstate.stackclean++; 168 scodelem(cdb,e.EV.E2, &retregs2, retregs1 & ~mSTACK, false); 169 if (retregs1 & mSTACK) 170 cgstate.stackclean--; 171 callclib(cdb, e, clib, pretregs, 0); 172 } 173 } 174 175 /***************************** 176 * Handle operators which are more or less orthogonal 177 * ( + - & | ^ ) 178 */ 179 180 void cdorth(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 181 { 182 //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 183 elem *e1 = e.EV.E1; 184 elem *e2 = e.EV.E2; 185 if (*pretregs == 0) // if don't want result 186 { 187 codelem(cdb,e1,pretregs,false); // eval left leaf 188 *pretregs = 0; // in case they got set 189 codelem(cdb,e2,pretregs,false); 190 return; 191 } 192 193 const ty = tybasic(e.Ety); 194 const ty1 = tybasic(e1.Ety); 195 196 if (tyfloating(ty1)) 197 { 198 if (tyvector(ty1) || 199 config.fpxmmregs && tyxmmreg(ty1) && 200 !(*pretregs & mST0) && 201 !(*pretregs & mST01) && 202 !(ty == TYldouble || ty == TYildouble) // watch out for shrinkLongDoubleConstantIfPossible() 203 ) 204 { 205 orthxmm(cdb,e,pretregs); 206 return; 207 } 208 if (config.inline8087) 209 { 210 orth87(cdb,e,pretregs); 211 return; 212 } 213 static if (TARGET_WINDOS) 214 { 215 opdouble(cdb,e,pretregs,(e.Eoper == OPadd) ? CLIB.dadd 216 : CLIB.dsub); 217 return; 218 } 219 else 220 { 221 assert(0); 222 } 223 } 224 if (tyxmmreg(ty1)) 225 { 226 orthxmm(cdb,e,pretregs); 227 return; 228 } 229 230 opcode_t op1, op2; 231 uint mode; 232 __gshared int nest; 233 234 const ty2 = tybasic(e2.Ety); 235 const e2oper = e2.Eoper; 236 const sz = _tysize[ty]; 237 const isbyte = (sz == 1); 238 code_flags_t word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 239 bool test = false; // assume we destroyed lvalue 240 241 switch (e.Eoper) 242 { 243 case OPadd: mode = 0; 244 op1 = 0x03; op2 = 0x13; break; /* ADD, ADC */ 245 case OPmin: mode = 5; 246 op1 = 0x2B; op2 = 0x1B; break; /* SUB, SBB */ 247 case OPor: mode = 1; 248 op1 = 0x0B; op2 = 0x0B; break; /* OR , OR */ 249 case OPxor: mode = 6; 250 op1 = 0x33; op2 = 0x33; break; /* XOR, XOR */ 251 case OPand: mode = 4; 252 op1 = 0x23; op2 = 0x23; /* AND, AND */ 253 if (tyreg(ty1) && 254 *pretregs == mPSW) /* if flags only */ 255 { 256 test = true; 257 op1 = 0x85; /* TEST */ 258 mode = 0; 259 } 260 break; 261 262 default: 263 assert(0); 264 } 265 op1 ^= isbyte; /* if byte operation */ 266 267 // Compute numwords, the number of words to operate on. 268 int numwords = 1; 269 if (!I16) 270 { 271 /* Cannot operate on longs and then do a 'paint' to a far */ 272 /* pointer, because far pointers are 48 bits and longs are 32. */ 273 /* Therefore, numwords can never be 2. */ 274 assert(!(tyfv(ty1) && tyfv(ty2))); 275 if (sz == 2 * REGSIZE) 276 { 277 numwords++; 278 } 279 } 280 else 281 { 282 /* If ty is a TYfptr, but both operands are long, treat the */ 283 /* operation as a long. */ 284 if ((tylong(ty1) || ty1 == TYhptr) && 285 (tylong(ty2) || ty2 == TYhptr)) 286 numwords++; 287 } 288 289 // Special cases where only flags are set 290 if (test && _tysize[ty1] <= REGSIZE && 291 (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 292 && !movOnly(e1) 293 ) 294 { 295 // Handle the case of (var & const) 296 if (e2.Eoper == OPconst && el_signx32(e2)) 297 { 298 code cs = void; 299 cs.Iflags = 0; 300 cs.Irex = 0; 301 getlvalue(cdb,&cs,e1,0); 302 targ_size_t value = e2.EV.Vpointer; 303 if (sz == 2) 304 value &= 0xFFFF; 305 else if (sz == 4) 306 value &= 0xFFFFFFFF; 307 reg_t reg; 308 if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,value,®)) 309 { 310 code_newreg(&cs, reg); 311 if (I64 && isbyte && reg >= 4) 312 cs.Irex |= REX; 313 } 314 else 315 { 316 if (sz == 8 && !I64) 317 { 318 assert(value == cast(int)value); // sign extend imm32 319 } 320 op1 = 0xF7; 321 cs.IEV2.Vint = cast(targ_int)value; 322 cs.IFL2 = FLconst; 323 } 324 cs.Iop = op1 ^ isbyte; 325 cs.Iflags |= word | CFpsw; 326 freenode(e1); 327 freenode(e2); 328 cdb.gen(&cs); 329 return; 330 } 331 332 // Handle (exp & reg) 333 reg_t reg; 334 regm_t retregs; 335 if (isregvar(e2,&retregs,®)) 336 { 337 code cs = void; 338 cs.Iflags = 0; 339 cs.Irex = 0; 340 getlvalue(cdb,&cs,e1,0); 341 code_newreg(&cs, reg); 342 if (I64 && isbyte && reg >= 4) 343 cs.Irex |= REX; 344 cs.Iop = op1 ^ isbyte; 345 cs.Iflags |= word | CFpsw; 346 freenode(e1); 347 freenode(e2); 348 cdb.gen(&cs); 349 return; 350 } 351 } 352 353 code cs = void; 354 cs.Iflags = 0; 355 cs.Irex = 0; 356 357 // Look for possible uses of LEA 358 if (e.Eoper == OPadd && 359 !(*pretregs & mPSW) && // flags aren't set by LEA 360 !nest && // could cause infinite recursion if e.Ecount 361 (sz == REGSIZE || (I64 && sz == 4))) // far pointers aren't handled 362 { 363 const rex = (sz == 8) ? REX_W : 0; 364 365 // Handle the case of (e + &var) 366 int e1oper = e1.Eoper; 367 if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2.Ecount && stackfl[el_fl(e2)]))) 368 || // LEA costs too much for simple EAs on older CPUs 369 (e2oper == OPconst && (e1.Eoper == OPcall || e1.Eoper == OPcallns) && !(*pretregs & mAX)) || 370 (!I16 && (isscaledindex(e1) || isscaledindex(e2))) || 371 (!I16 && e1oper == OPvar && e1.EV.Vsym.Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2.EV.Vsym.Sfl == FLreg))) || 372 (e2oper == OPconst && e1oper == OPeq && e1.EV.E1.Eoper == OPvar) || 373 (!I16 && (e2oper == OPrelconst || e2oper == OPconst) && !e1.Ecount && 374 (e1oper == OPmul || e1oper == OPshl) && 375 e1.EV.E2.Eoper == OPconst && 376 ssindex(e1oper,e1.EV.E2.EV.Vuns) 377 ) || 378 (!I16 && e1.Ecount) 379 ) 380 { 381 const inc = e.Ecount != 0; 382 nest += inc; 383 code csx = void; 384 getlvalue(cdb,&csx,e,0); 385 nest -= inc; 386 reg_t regx; 387 allocreg(cdb,pretregs,®x,ty); 388 csx.Iop = LEA; 389 code_newreg(&csx, regx); 390 cdb.gen(&csx); // LEA regx,EA 391 if (rex) 392 code_orrex(cdb.last(), rex); 393 return; 394 } 395 396 // Handle the case of ((e + c) + e2) 397 if (!I16 && 398 e1oper == OPadd && 399 (e1.EV.E2.Eoper == OPconst && el_signx32(e1.EV.E2) || 400 e2oper == OPconst && el_signx32(e2)) && 401 !e1.Ecount 402 ) 403 { 404 elem *ebase; 405 elem *edisp; 406 if (e2oper == OPconst && el_signx32(e2)) 407 { edisp = e2; 408 ebase = e1.EV.E2; 409 } 410 else 411 { edisp = e1.EV.E2; 412 ebase = e2; 413 } 414 415 auto e11 = e1.EV.E1; 416 regm_t retregs = *pretregs & ALLREGS; 417 if (!retregs) 418 retregs = ALLREGS; 419 int ss = 0; 420 int ss2 = 0; 421 422 // Handle the case of (((e * c1) + c2) + e2) 423 // Handle the case of (((e << c1) + c2) + e2) 424 if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && 425 e11.EV.E2.Eoper == OPconst && 426 !e11.Ecount 427 ) 428 { 429 const co1 = cast(targ_size_t)el_tolong(e11.EV.E2); 430 if (e11.Eoper == OPshl) 431 { 432 if (co1 > 3) 433 goto L13; 434 ss = cast(int)co1; 435 } 436 else 437 { 438 ss2 = 1; 439 switch (co1) 440 { 441 case 6: ss = 1; break; 442 case 12: ss = 1; ss2 = 2; break; 443 case 24: ss = 1; ss2 = 3; break; 444 case 10: ss = 2; break; 445 case 20: ss = 2; ss2 = 2; break; 446 case 40: ss = 2; ss2 = 3; break; 447 case 18: ss = 3; break; 448 case 36: ss = 3; ss2 = 2; break; 449 case 72: ss = 3; ss2 = 3; break; 450 default: 451 ss2 = 0; 452 goto L13; 453 } 454 } 455 freenode(e11.EV.E2); 456 freenode(e11); 457 e11 = e11.EV.E1; 458 L13: 459 { } 460 } 461 462 reg_t reg11; 463 regm_t regm; 464 if (e11.Eoper == OPvar && isregvar(e11,®m,®11)) 465 { 466 if (tysize(e11.Ety) <= REGSIZE) 467 retregs = mask(reg11); // only want the LSW 468 else 469 retregs = regm; 470 freenode(e11); 471 } 472 else 473 codelem(cdb,e11,&retregs,false); 474 475 regm_t rretregs = ALLREGS & ~retregs & ~mBP; 476 scodelem(cdb,ebase,&rretregs,retregs,true); 477 reg_t reg; 478 { 479 regm_t sregs = *pretregs & ~rretregs; 480 if (!sregs) 481 sregs = ALLREGS & ~rretregs; 482 allocreg(cdb,&sregs,®,ty); 483 } 484 485 assert((retregs & (retregs - 1)) == 0); // must be only one register 486 assert((rretregs & (rretregs - 1)) == 0); // must be only one register 487 488 auto reg1 = findreg(retregs); 489 const reg2 = findreg(rretregs); 490 491 if (ss2) 492 { 493 assert(reg != reg2); 494 if ((reg1 & 7) == BP) 495 { static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1]; 496 497 // IMUL reg,imm32 498 cdb.genc2(0x69,modregxrmx(3,reg,reg1),imm32[ss]); 499 } 500 else 501 { // LEA reg,[reg1*ss][reg1] 502 cdb.gen2sib(LEA,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7)); 503 if (reg1 & 8) 504 code_orrex(cdb.last(), REX_X | REX_B); 505 } 506 if (rex) 507 code_orrex(cdb.last(), rex); 508 reg1 = reg; 509 ss = ss2; // use *2 for scale 510 } 511 512 cs.Iop = LEA; // LEA reg,c[reg1*ss][reg2] 513 cs.Irm = modregrm(2,reg & 7,4); 514 cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7); 515 assert(reg2 != BP); 516 cs.Iflags = CFoff; 517 cs.Irex = cast(ubyte)rex; 518 if (reg & 8) 519 cs.Irex |= REX_R; 520 if (reg1 & 8) 521 cs.Irex |= REX_X; 522 if (reg2 & 8) 523 cs.Irex |= REX_B; 524 cs.IFL1 = FLconst; 525 cs.IEV1.Vsize_t = edisp.EV.Vuns; 526 527 freenode(edisp); 528 freenode(e1); 529 cdb.gen(&cs); 530 fixresult(cdb,e,mask(reg),pretregs); 531 return; 532 } 533 } 534 535 regm_t posregs = (isbyte) ? BYTEREGS : (mES | ALLREGS | mBP); 536 regm_t retregs = *pretregs & posregs; 537 if (retregs == 0) /* if no return regs speced */ 538 /* (like if wanted flags only) */ 539 retregs = ALLREGS & posregs; // give us some 540 541 if (ty1 == TYhptr || ty2 == TYhptr) 542 { /* Generate code for add/subtract of huge pointers. 543 No attempt is made to generate very good code. 544 */ 545 retregs = (retregs & mLSW) | mDX; 546 regm_t rretregs; 547 if (ty1 == TYhptr) 548 { // hptr +- long 549 rretregs = mLSW & ~(retregs | regcon.mvar); 550 if (!rretregs) 551 rretregs = mLSW; 552 rretregs |= mCX; 553 codelem(cdb,e1,&rretregs,0); 554 retregs &= ~rretregs; 555 if (!(retregs & mLSW)) 556 retregs |= mLSW & ~rretregs; 557 558 scodelem(cdb,e2,&retregs,rretregs,true); 559 } 560 else 561 { // long + hptr 562 codelem(cdb,e1,&retregs,0); 563 rretregs = (mLSW | mCX) & ~retregs; 564 if (!(rretregs & mLSW)) 565 rretregs |= mLSW; 566 scodelem(cdb,e2,&rretregs,retregs,true); 567 } 568 getregs(cdb,rretregs | retregs); 569 const mreg = DX; 570 const lreg = findreglsw(retregs); 571 if (e.Eoper == OPmin) 572 { // negate retregs 573 cdb.gen2(0xF7,modregrm(3,3,mreg)); // NEG mreg 574 cdb.gen2(0xF7,modregrm(3,3,lreg)); // NEG lreg 575 code_orflag(cdb.last(),CFpsw); 576 cdb.genc2(0x81,modregrm(3,3,mreg),0); // SBB mreg,0 577 } 578 const lrreg = findreglsw(rretregs); 579 genregs(cdb,0x03,lreg,lrreg); // ADD lreg,lrreg 580 code_orflag(cdb.last(),CFpsw); 581 genmovreg(cdb,lrreg,CX); // MOV lrreg,CX 582 cdb.genc2(0x81,modregrm(3,2,mreg),0); // ADC mreg,0 583 genshift(cdb); // MOV CX,offset __AHSHIFT 584 cdb.gen2(0xD3,modregrm(3,4,mreg)); // SHL mreg,CL 585 genregs(cdb,0x03,mreg,lrreg); // ADD mreg,MSREG(h) 586 fixresult(cdb,e,retregs,pretregs); 587 return; 588 } 589 590 regm_t rretregs; 591 reg_t reg; 592 if (_tysize[ty1] > REGSIZE && numwords == 1) 593 { /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */ 594 595 debug 596 if (_tysize[ty2] != REGSIZE) 597 { 598 printf("e = %p, e.Eoper = ",e); 599 WROP(e.Eoper); 600 printf(" e1.Ety = "); 601 WRTYxx(ty1); 602 printf(" e2.Ety = "); 603 WRTYxx(ty2); 604 printf("\n"); 605 elem_print(e); 606 } 607 608 assert(_tysize[ty2] == REGSIZE); 609 610 /* Watch out for the case here where you are going to OP reg,EA */ 611 /* and both the reg and EA use ES! Prevent this by forcing */ 612 /* reg into the regular registers. */ 613 if ((e2oper == OPind || 614 (e2oper == OPvar && el_fl(e2) == FLfardata)) && 615 !e2.Ecount) 616 { 617 retregs = ALLREGS; 618 } 619 620 codelem(cdb,e1,&retregs,test != 0); 621 reg = findreglsw(retregs); /* reg is the register with the offset*/ 622 } 623 else 624 { 625 regm_t regm; 626 627 /* if (tyword + TYfptr) */ 628 if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE) 629 { retregs = ~*pretregs & ALLREGS; 630 631 /* if retregs doesn't have any regs in it that aren't reg vars */ 632 if ((retregs & ~regcon.mvar) == 0) 633 retregs |= mAX; 634 } 635 else if (numwords == 2 && retregs & mES) 636 retregs = (retregs | mMSW) & ALLREGS; 637 638 // Determine if we should swap operands, because 639 // mov EAX,x 640 // add EAX,reg 641 // is faster than: 642 // mov EAX,reg 643 // add EAX,x 644 else if (e2oper == OPvar && 645 e1.Eoper == OPvar && 646 e.Eoper != OPmin && 647 isregvar(e1,®m,null) && 648 regm != retregs && 649 _tysize[ty1] == _tysize[ty2]) 650 { 651 elem *es = e1; 652 e1 = e2; 653 e2 = es; 654 } 655 codelem(cdb,e1,&retregs,test != 0); // eval left leaf 656 reg = findreg(retregs); 657 } 658 reg_t rreg; 659 int rval; 660 targ_size_t i; 661 switch (e2oper) 662 { 663 case OPind: /* if addressing mode */ 664 if (!e2.Ecount) /* if not CSE */ 665 goto L1; /* try OP reg,EA */ 666 goto default; 667 668 default: /* operator node */ 669 L2: 670 rretregs = ALLREGS & ~retregs; 671 /* Be careful not to do arithmetic on ES */ 672 if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE && *pretregs != mPSW) 673 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs; 674 else if (isbyte) 675 rretregs &= BYTEREGS; 676 677 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 678 rreg = (_tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs); 679 if (!test) 680 getregs(cdb,retregs); // we will trash these regs 681 if (numwords == 1) /* ADD reg,rreg */ 682 { 683 /* reverse operands to avoid moving around the segment value */ 684 if (_tysize[ty2] > REGSIZE) 685 { 686 getregs(cdb,rretregs); 687 genregs(cdb,op1,rreg,reg); 688 retregs = rretregs; // reverse operands 689 } 690 else 691 { 692 genregs(cdb,op1,reg,rreg); 693 if (!I16 && *pretregs & mPSW) 694 cdb.last().Iflags |= word; 695 } 696 if (I64 && sz == 8) 697 code_orrex(cdb.last(), REX_W); 698 if (I64 && isbyte && (reg >= 4 || rreg >= 4)) 699 code_orrex(cdb.last(), REX); 700 } 701 else /* numwords == 2 */ /* ADD lsreg,lsrreg */ 702 { 703 reg = findreglsw(retregs); 704 rreg = findreglsw(rretregs); 705 genregs(cdb,op1,reg,rreg); 706 if (e.Eoper == OPadd || e.Eoper == OPmin) 707 code_orflag(cdb.last(),CFpsw); 708 reg = findregmsw(retregs); 709 rreg = findregmsw(rretregs); 710 if (!(e2oper == OPu16_32 && // if second operand is 0 711 (op2 == 0x0B || op2 == 0x33)) // and OR or XOR 712 ) 713 genregs(cdb,op2,reg,rreg); // ADC msreg,msrreg 714 } 715 break; 716 717 case OPrelconst: 718 if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) 719 goto default; 720 if (sz != REGSIZE) 721 goto L2; 722 if (segfl[el_fl(e2)] != 3) /* if not in data segment */ 723 goto L2; 724 if (evalinregister(e2)) 725 goto L2; 726 cs.IEV2.Voffset = e2.EV.Voffset; 727 cs.IEV2.Vsym = e2.EV.Vsym; 728 cs.Iflags |= CFoff; 729 i = 0; /* no INC or DEC opcode */ 730 rval = 0; 731 goto L3; 732 733 case OPconst: 734 if (tyfv(ty2)) 735 goto L2; 736 if (numwords == 1) 737 { 738 if (!el_signx32(e2)) 739 goto L2; 740 i = e2.EV.Vpointer; 741 if (word) 742 { 743 if (!(*pretregs & mPSW) && 744 config.flags4 & CFG4speed && 745 (e.Eoper == OPor || e.Eoper == OPxor || test || 746 (e1.Eoper != OPvar && e1.Eoper != OPind))) 747 { word = 0; 748 i &= 0xFFFF; 749 } 750 } 751 rval = reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&rreg); 752 cs.IEV2.Vsize_t = i; 753 L3: 754 if (!test) 755 getregs(cdb,retregs); // we will trash these regs 756 op1 ^= isbyte; 757 cs.Iflags |= word; 758 if (rval) 759 { cs.Iop = op1 ^ 2; 760 mode = rreg; 761 } 762 else 763 cs.Iop = 0x81; 764 cs.Irm = modregrm(3,mode&7,reg&7); 765 if (mode & 8) 766 cs.Irex |= REX_R; 767 if (reg & 8) 768 cs.Irex |= REX_B; 769 if (I64 && sz == 8) 770 cs.Irex |= REX_W; 771 if (I64 && isbyte && (reg >= 4 || (rval && rreg >= 4))) 772 cs.Irex |= REX; 773 cs.IFL2 = cast(ubyte)((e2.Eoper == OPconst) ? FLconst : el_fl(e2)); 774 /* Modify instruction for special cases */ 775 switch (e.Eoper) 776 { 777 case OPadd: 778 { 779 int iop; 780 781 if (i == 1) 782 iop = 0; /* INC reg */ 783 else if (i == -1) 784 iop = 8; /* DEC reg */ 785 else 786 break; 787 cs.Iop = (0x40 | iop | reg) ^ isbyte; 788 if ((isbyte && *pretregs & mPSW) || I64) 789 { 790 cs.Irm = cast(ubyte)(modregrm(3,0,reg & 7) | iop); 791 cs.Iop = 0xFF; 792 } 793 break; 794 } 795 796 case OPand: 797 if (test) 798 cs.Iop = rval ? op1 : 0xF7; // TEST 799 break; 800 801 default: 802 break; 803 } 804 if (*pretregs & mPSW) 805 cs.Iflags |= CFpsw; 806 cs.Iop ^= isbyte; 807 cdb.gen(&cs); 808 cs.Iflags &= ~CFpsw; 809 } 810 else if (numwords == 2) 811 { 812 getregs(cdb,retregs); 813 reg = findregmsw(retregs); 814 const lsreg = findreglsw(retregs); 815 cs.Iop = 0x81; 816 cs.Irm = modregrm(3,mode,lsreg); 817 cs.IFL2 = FLconst; 818 const msw = cast(targ_int)MSREG(e2.EV.Vllong); 819 cs.IEV2.Vint = e2.EV.Vlong; 820 switch (e.Eoper) 821 { 822 case OPadd: 823 case OPmin: 824 cs.Iflags |= CFpsw; 825 break; 826 827 default: 828 break; 829 } 830 cdb.gen(&cs); 831 cs.Iflags &= ~CFpsw; 832 833 cs.Irm = cast(ubyte)((cs.Irm & modregrm(3,7,0)) | reg); 834 cs.IEV2.Vint = msw; 835 if (e.Eoper == OPadd) 836 cs.Irm |= modregrm(0,2,0); /* ADC */ 837 cdb.gen(&cs); 838 } 839 else 840 assert(0); 841 freenode(e2); 842 break; 843 844 case OPvar: 845 if (movOnly(e2)) 846 goto L2; 847 L1: 848 if (tyfv(ty2)) 849 goto L2; 850 if (!test) 851 getregs(cdb,retregs); // we will trash these regs 852 loadea(cdb,e2,&cs,op1, 853 ((numwords == 2) ? findreglsw(retregs) : reg), 854 0,retregs,retregs); 855 if (!I16 && word) 856 { if (*pretregs & mPSW) 857 code_orflag(cdb.last(),word); 858 else 859 cdb.last().Iflags &= ~cast(int)word; 860 } 861 else if (numwords == 2) 862 { 863 if (e.Eoper == OPadd || e.Eoper == OPmin) 864 code_orflag(cdb.last(),CFpsw); 865 reg = findregmsw(retregs); 866 if (!OTleaf(e2.Eoper)) 867 { getlvalue_msw(&cs); 868 cs.Iop = op2; 869 NEWREG(cs.Irm,reg); 870 cdb.gen(&cs); // ADC reg,data+2 871 } 872 else 873 loadea(cdb,e2,&cs,op2,reg,REGSIZE,retregs,0); 874 } 875 else if (I64 && sz == 8) 876 code_orrex(cdb.last(), REX_W); 877 freenode(e2); 878 break; 879 } 880 881 if (sz <= REGSIZE && *pretregs & mPSW) 882 { 883 /* If the expression is (_tls_array + ...), then the flags are not set 884 * since the linker may rewrite these instructions into something else. 885 */ 886 if (I64 && e.Eoper == OPadd && e1.Eoper == OPvar) 887 { 888 const s = e1.EV.Vsym; 889 if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) 890 { 891 goto L7; // don't assume flags are set 892 } 893 } 894 code_orflag(cdb.last(),CFpsw); 895 *pretregs &= ~mPSW; // flags already set 896 L7: { } 897 } 898 fixresult(cdb,e,retregs,pretregs); 899 } 900 901 902 /***************************** 903 * Handle multiply. 904 */ 905 906 void cdmul(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 907 { 908 //printf("cdmul()\n"); 909 elem *e1 = e.EV.E1; 910 elem *e2 = e.EV.E2; 911 if (*pretregs == 0) // if don't want result 912 { 913 codelem(cdb,e1,pretregs,false); // eval left leaf 914 *pretregs = 0; // in case they got set 915 codelem(cdb,e2,pretregs,false); 916 return; 917 } 918 919 //printf("cdmul(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 920 const tyml = tybasic(e1.Ety); 921 const ty = tybasic(e.Ety); 922 const oper = e.Eoper; 923 924 if (tyfloating(tyml)) 925 { 926 if (tyvector(tyml) || 927 config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) && 928 !(*pretregs & mST0) && 929 !(ty == TYldouble || ty == TYildouble) && // watch out for shrinkLongDoubleConstantIfPossible() 930 !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div 931 !(ty == TYllong) // or passing to function through integer register 932 ) 933 { 934 orthxmm(cdb,e,pretregs); 935 return; 936 } 937 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 938 orth87(cdb,e,pretregs); 939 else 940 opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv); 941 942 return; 943 } 944 945 if (tyxmmreg(tyml)) 946 { 947 orthxmm(cdb,e,pretregs); 948 return; 949 } 950 951 const uns = tyuns(tyml) || tyuns(e2.Ety); // 1 if signed operation, 0 if unsigned 952 const isbyte = tybyte(e.Ety) != 0; 953 const sz = _tysize[tyml]; 954 const ubyte rex = (I64 && sz == 8) ? REX_W : 0; 955 const uint grex = rex << 16; 956 const OPER opunslng = I16 ? OPu16_32 : OPu32_64; 957 958 code cs = void; 959 cs.Iflags = 0; 960 cs.Irex = 0; 961 962 switch (e2.Eoper) 963 { 964 case OPu16_32: 965 case OPs16_32: 966 case OPu32_64: 967 case OPs32_64: 968 { 969 if (sz != 2 * REGSIZE || e1.Eoper != e2.Eoper || 970 e1.Ecount || e2.Ecount) 971 goto default; 972 const ubyte opx = (e2.Eoper == opunslng) ? 4 : 5; 973 regm_t retregsx = mAX; 974 codelem(cdb,e1.EV.E1,&retregsx,false); // eval left leaf 975 if (e2.EV.E1.Eoper == OPvar || 976 (e2.EV.E1.Eoper == OPind && !e2.EV.E1.Ecount) 977 ) 978 { 979 loadea(cdb,e2.EV.E1,&cs,0xF7,opx,0,mAX,mAX | mDX); 980 } 981 else 982 { 983 regm_t rretregsx = ALLREGS & ~mAX; 984 scodelem(cdb,e2.EV.E1,&rretregsx,retregsx,true); // get rvalue 985 getregs(cdb,mAX | mDX); 986 const rregx = findreg(rretregsx); 987 cdb.gen2(0xF7,grex | modregrmx(3,opx,rregx)); // OP AX,rregx 988 } 989 freenode(e.EV.E1); 990 freenode(e2); 991 fixresult(cdb,e,mAX | mDX,pretregs); 992 return; 993 } 994 995 case OPconst: 996 const e2factor = cast(targ_size_t)el_tolong(e2); 997 998 // Multiply by a constant 999 if (I32 && sz == REGSIZE * 2) 1000 { 1001 /* if (msw) 1002 IMUL EDX,EDX,lsw 1003 IMUL reg,EAX,msw 1004 ADD reg,EDX 1005 else 1006 IMUL reg,EDX,lsw 1007 MOV EDX,lsw 1008 MUL EDX 1009 ADD EDX,reg 1010 */ 1011 regm_t retregs = mAX | mDX; 1012 codelem(cdb,e1,&retregs,false); // eval left leaf 1013 reg_t reg = allocScratchReg(cdb, allregs & ~(mAX | mDX)); 1014 getregs(cdb,mDX | mAX); 1015 1016 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1)); 1017 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8)); 1018 1019 if (msw) 1020 { 1021 genmulimm(cdb,DX,DX,lsw); // IMUL EDX,EDX,lsw 1022 genmulimm(cdb,reg,AX,msw); // IMUL reg,EAX,msw 1023 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX 1024 } 1025 else 1026 genmulimm(cdb,reg,DX,lsw); // IMUL reg,EDX,lsw 1027 1028 movregconst(cdb,DX,lsw,0); // MOV EDX,lsw 1029 getregs(cdb,mDX); 1030 cdb.gen2(0xF7,modregrm(3,4,DX)); // MUL EDX 1031 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD EDX,reg 1032 1033 const resregx = mDX | mAX; 1034 freenode(e2); 1035 fixresult(cdb,e,resregx,pretregs); 1036 return; 1037 } 1038 1039 1040 const int pow2 = ispow2(e2factor); 1041 1042 if (sz > REGSIZE || !el_signx32(e2)) 1043 goto default; 1044 1045 if (config.target_cpu >= TARGET_80286) 1046 { 1047 if (I32 || I64) 1048 { 1049 // See if we can use an LEA instruction 1050 int ss; 1051 int ss2 = 0; 1052 int shift; 1053 1054 switch (e2factor) 1055 { 1056 case 12: ss = 1; ss2 = 2; goto L4; 1057 case 24: ss = 1; ss2 = 3; goto L4; 1058 1059 case 6: 1060 case 3: ss = 1; goto L4; 1061 1062 case 20: ss = 2; ss2 = 2; goto L4; 1063 case 40: ss = 2; ss2 = 3; goto L4; 1064 1065 case 10: 1066 case 5: ss = 2; goto L4; 1067 1068 case 36: ss = 3; ss2 = 2; goto L4; 1069 case 72: ss = 3; ss2 = 3; goto L4; 1070 1071 case 18: 1072 case 9: ss = 3; goto L4; 1073 1074 L4: 1075 { 1076 regm_t resreg = *pretregs & ALLREGS & ~(mBP | mR13); 1077 if (!resreg) 1078 resreg = isbyte ? BYTEREGS : ALLREGS & ~(mBP | mR13); 1079 1080 codelem(cdb,e.EV.E1,&resreg,false); 1081 getregs(cdb,resreg); 1082 reg_t reg = findreg(resreg); 1083 1084 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1085 modregxrmx(ss,reg,reg)); // LEA reg,[ss*reg][reg] 1086 assert((reg & 7) != BP); 1087 if (ss2) 1088 { 1089 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1090 modregxrm(ss2,reg,5)); 1091 cdb.last().IFL1 = FLconst; 1092 cdb.last().IEV1.Vint = 0; // LEA reg,0[ss2*reg] 1093 } 1094 else if (!(e2factor & 1)) // if even factor 1095 { 1096 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1097 code_orrex(cdb.last(),rex); 1098 } 1099 freenode(e2); 1100 fixresult(cdb,e,resreg,pretregs); 1101 return; 1102 } 1103 case 37: 1104 case 74: shift = 2; 1105 goto L5; 1106 case 13: 1107 case 26: shift = 0; 1108 goto L5; 1109 L5: 1110 { 1111 regm_t retregs = isbyte ? BYTEREGS : ALLREGS; 1112 regm_t resreg = *pretregs & (ALLREGS | mBP); 1113 if (!resreg) 1114 resreg = retregs; 1115 1116 // Don't use EBP 1117 resreg &= ~(mBP | mR13); 1118 if (!resreg) 1119 resreg = retregs; 1120 reg_t reg; 1121 allocreg(cdb,&resreg,®,TYint); 1122 1123 regm_t sregm = (ALLREGS & ~mR13) & ~resreg; 1124 codelem(cdb,e.EV.E1,&sregm,false); 1125 uint sreg = findreg(sregm); 1126 getregs(cdb,resreg | sregm); 1127 assert((sreg & 7) != BP); 1128 assert((reg & 7) != BP); 1129 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1130 modregxrmx(2,sreg,sreg)); // LEA reg,[sreg*4][sreg] 1131 if (shift) 1132 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift 1133 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1134 modregxrmx(3,sreg,reg)); // LEA reg,[sreg*8][reg] 1135 if (!(e2factor & 1)) // if even factor 1136 { 1137 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1138 code_orrex(cdb.last(),rex); 1139 } 1140 freenode(e2); 1141 fixresult(cdb,e,resreg,pretregs); 1142 return; 1143 } 1144 1145 default: 1146 break; 1147 } 1148 } 1149 1150 regm_t retregs = isbyte ? BYTEREGS : ALLREGS; 1151 regm_t resreg = *pretregs & (ALLREGS | mBP); 1152 if (!resreg) 1153 resreg = retregs; 1154 1155 scodelem(cdb,e.EV.E1,&retregs,0,true); // eval left leaf 1156 const regx = findreg(retregs); 1157 reg_t rreg; 1158 allocreg(cdb,&resreg,&rreg,e.Ety); 1159 1160 // IMUL regx,imm16 1161 cdb.genc2(0x69,grex | modregxrmx(3,rreg,regx),e2factor); 1162 freenode(e2); 1163 fixresult(cdb,e,resreg,pretregs); 1164 return; 1165 } 1166 goto default; 1167 1168 case OPind: 1169 if (!e2.Ecount) // if not CSE 1170 goto case OPvar; // try OP reg,EA 1171 goto default; 1172 1173 default: // OPconst and operators 1174 //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg)); 1175 if (sz <= REGSIZE) 1176 { 1177 regm_t retregs = mAX; 1178 codelem(cdb,e1,&retregs,false); // eval left leaf 1179 regm_t rretregs = isbyte ? BYTEREGS & ~mAX 1180 : ALLREGS & ~(mAX|mDX); 1181 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1182 getregs(cdb,mAX | mDX); // trash these regs 1183 reg_t rreg = findreg(rretregs); 1184 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,5 - uns,rreg)); // OP AX,rreg 1185 if (I64 && isbyte && rreg >= 4) 1186 code_orrex(cdb.last(), REX); 1187 fixresult(cdb,e,mAX,pretregs); 1188 return; 1189 } 1190 else if (sz == 2 * REGSIZE) 1191 { 1192 regm_t retregs = mDX | mAX; 1193 codelem(cdb,e1,&retregs,false); // eval left leaf 1194 if (config.target_cpu >= TARGET_PentiumPro) 1195 { 1196 regm_t rretregs = allregs & ~retregs; // second arg 1197 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1198 regm_t rlo = findreglsw(rretregs); 1199 regm_t rhi = findregmsw(rretregs); 1200 /* IMUL rhi,EAX 1201 IMUL EDX,rlo 1202 ADD rhi,EDX 1203 MUL rlo 1204 ADD EDX,rhi 1205 */ 1206 getregs(cdb,mAX|mDX|mask(rhi)); 1207 cdb.gen2(0x0FAF,modregrm(3,rhi,AX)); 1208 cdb.gen2(0x0FAF,modregrm(3,DX,rlo)); 1209 cdb.gen2(0x03,modregrm(3,rhi,DX)); 1210 cdb.gen2(0xF7,modregrm(3,4,rlo)); 1211 cdb.gen2(0x03,modregrm(3,DX,rhi)); 1212 fixresult(cdb,e,mDX|mAX,pretregs); 1213 return; 1214 } 1215 else 1216 { 1217 regm_t rretregs = mCX | mBX; // second arg 1218 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1219 callclib(cdb,e,CLIB.lmul,pretregs,0); 1220 return; 1221 } 1222 } 1223 assert(0); 1224 1225 case OPvar: 1226 if (!I16 && sz <= REGSIZE) 1227 { 1228 if (sz > 1) // no byte version 1229 { 1230 // Generate IMUL r32,r/m32 1231 regm_t retregs = *pretregs & (ALLREGS | mBP); 1232 if (!retregs) 1233 retregs = ALLREGS; 1234 codelem(cdb,e1,&retregs,false); // eval left leaf 1235 regm_t resreg = retregs; 1236 loadea(cdb,e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs); 1237 freenode(e2); 1238 fixresult(cdb,e,resreg,pretregs); 1239 return; 1240 } 1241 } 1242 else 1243 { 1244 if (sz == 2 * REGSIZE) 1245 { 1246 if (e.EV.E1.Eoper != opunslng || 1247 e1.Ecount) 1248 goto default; // have to handle it with codelem() 1249 1250 regm_t retregs = ALLREGS & ~(mAX | mDX); 1251 codelem(cdb,e1.EV.E1,&retregs,false); // eval left leaf 1252 const reg = findreg(retregs); 1253 getregs(cdb,mAX); 1254 genmovreg(cdb,AX,reg); // MOV AX,reg 1255 loadea(cdb,e2,&cs,0xF7,4,REGSIZE,mAX | mDX | mskl(reg),mAX | mDX); // MUL EA+2 1256 getregs(cdb,retregs); 1257 cdb.gen1(0x90 + reg); // XCHG AX,reg 1258 getregs(cdb,mAX | mDX); 1259 if ((cs.Irm & 0xC0) == 0xC0) // if EA is a register 1260 loadea(cdb,e2,&cs,0xF7,4,0,mAX | mskl(reg),mAX | mDX); // MUL EA 1261 else 1262 { getlvalue_lsw(&cs); 1263 cdb.gen(&cs); // MUL EA 1264 } 1265 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD DX,reg 1266 1267 freenode(e1); 1268 fixresult(cdb,e,mAX | mDX,pretregs); 1269 return; 1270 } 1271 assert(sz <= REGSIZE); 1272 } 1273 1274 // loadea() handles CWD or CLR DX for divides 1275 regm_t retregs = sz <= REGSIZE ? mAX : mDX|mAX; 1276 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1277 loadea(cdb,e2,&cs,0xF7 ^ isbyte,5 - uns,0, 1278 mAX, 1279 mAX | mDX); 1280 freenode(e2); 1281 fixresult(cdb,e,mAX,pretregs); 1282 return; 1283 } 1284 assert(0); 1285 } 1286 1287 1288 /***************************** 1289 * Handle divide, modulo and remquo. 1290 * Note that modulo isn't defined for doubles. 1291 */ 1292 1293 void cddiv(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1294 { 1295 //printf("cddiv()\n"); 1296 elem *e1 = e.EV.E1; 1297 elem *e2 = e.EV.E2; 1298 if (*pretregs == 0) // if don't want result 1299 { 1300 codelem(cdb,e1,pretregs,false); // eval left leaf 1301 *pretregs = 0; // in case they got set 1302 codelem(cdb,e2,pretregs,false); 1303 return; 1304 } 1305 1306 //printf("cddiv(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1307 const tyml = tybasic(e1.Ety); 1308 const ty = tybasic(e.Ety); 1309 const oper = e.Eoper; 1310 1311 if (tyfloating(tyml)) 1312 { 1313 if (tyvector(tyml) || 1314 config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) && 1315 !(*pretregs & mST0) && 1316 !(ty == TYldouble || ty == TYildouble) && // watch out for shrinkLongDoubleConstantIfPossible() 1317 !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div 1318 !(ty == TYllong) // or passing to function through integer register 1319 ) 1320 { 1321 orthxmm(cdb,e,pretregs); 1322 return; 1323 } 1324 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1325 orth87(cdb,e,pretregs); 1326 else 1327 opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv); 1328 1329 return; 1330 } 1331 1332 if (tyxmmreg(tyml)) 1333 { 1334 orthxmm(cdb,e,pretregs); 1335 return; 1336 } 1337 1338 const uns = tyuns(tyml) || tyuns(e2.Ety); // 1 if uint operation, 0 if not 1339 const isbyte = tybyte(e.Ety) != 0; 1340 const sz = _tysize[tyml]; 1341 const ubyte rex = (I64 && sz == 8) ? REX_W : 0; 1342 const uint grex = rex << 16; 1343 1344 code cs = void; 1345 cs.Iflags = 0; 1346 cs.Irex = 0; 1347 1348 switch (e2.Eoper) 1349 { 1350 case OPconst: 1351 auto d = cast(targ_size_t)el_tolong(e2); 1352 bool neg = false; 1353 const e2factor = d; 1354 if (!uns && cast(targ_llong)e2factor < 0) 1355 { neg = true; 1356 d = -d; 1357 } 1358 1359 // Signed divide by a constant 1360 if ((d & (d - 1)) && 1361 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) && 1362 config.flags4 & CFG4speed && !uns) 1363 { 1364 /* R1 / 10 1365 * 1366 * MOV EAX,m 1367 * IMUL R1 1368 * MOV EAX,R1 1369 * SAR EAX,31 1370 * SAR EDX,shpost 1371 * SUB EDX,EAX 1372 * IMUL EAX,EDX,d 1373 * SUB R1,EAX 1374 * 1375 * EDX = quotient 1376 * R1 = remainder 1377 */ 1378 assert(sz == 4 || sz == 8); 1379 1380 ulong m; 1381 int shpost; 1382 const int N = sz * 8; 1383 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost); 1384 1385 regm_t regm = allregs & ~(mAX | mDX); 1386 codelem(cdb,e1,®m,false); // eval left leaf 1387 const reg_t reg = findreg(regm); 1388 getregs(cdb,regm | mDX | mAX); 1389 1390 /* Algorithm 5.2 1391 * if m>=2**(N-1) 1392 * q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n) 1393 * else 1394 * q = SRA(MULSH(m,n), shpost) - XSIGN(n) 1395 * if (neg) 1396 * q = -q 1397 */ 1398 const bool mgt = mhighbit || m >= (1UL << (N - 1)); 1399 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EAX,m 1400 cdb.gen2(0xF7,grex | modregrmx(3,5,reg)); // IMUL R1 1401 if (mgt) 1402 cdb.gen2(0x03,grex | modregrmx(3,DX,reg)); // ADD EDX,R1 1403 getregsNoSave(mAX); // EAX no longer contains 'm' 1404 genmovreg(cdb, AX, reg); // MOV EAX,R1 1405 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1); // SAR EAX,31 1406 if (shpost) 1407 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost); // SAR EDX,shpost 1408 reg_t r3; 1409 if (neg && oper == OPdiv) 1410 { 1411 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1412 r3 = AX; 1413 } 1414 else 1415 { 1416 cdb.gen2(0x2B,grex | modregrm(3,DX,AX)); // SUB EDX,EAX 1417 r3 = DX; 1418 } 1419 1420 // r3 is quotient 1421 regm_t resregx; 1422 switch (oper) 1423 { case OPdiv: 1424 resregx = mask(r3); 1425 break; 1426 1427 case OPmod: 1428 assert(reg != AX && r3 == DX); 1429 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1430 { 1431 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1432 } 1433 else 1434 { 1435 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1436 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1437 getregsNoSave(mAX); // EAX no longer contains 'd' 1438 } 1439 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1440 resregx = regm; 1441 break; 1442 1443 case OPremquo: 1444 assert(reg != AX && r3 == DX); 1445 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1446 { 1447 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1448 } 1449 else 1450 { 1451 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1452 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1453 } 1454 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1455 genmovreg(cdb, AX, r3); // MOV EAX,r3 1456 if (neg) 1457 cdb.gen2(0xF7,grex | modregrm(3,3,AX)); // NEG EAX 1458 genmovreg(cdb, DX, reg); // MOV EDX,R1 1459 resregx = mDX | mAX; 1460 break; 1461 1462 default: 1463 assert(0); 1464 } 1465 freenode(e2); 1466 fixresult(cdb,e,resregx,pretregs); 1467 return; 1468 } 1469 1470 // Unsigned divide by a constant 1471 if (e2factor > 2 && (e2factor & (e2factor - 1)) && 1472 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) && 1473 config.flags4 & CFG4speed && uns) 1474 { 1475 assert(sz == 4 || sz == 8); 1476 1477 reg_t r3; 1478 regm_t regm; 1479 reg_t reg; 1480 ulong m; 1481 int shpre; 1482 int shpost; 1483 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost)) 1484 { 1485 /* t1 = MULUH(m, n) 1486 * q = SRL(t1 + SRL(n - t1, 1), shpost - 1) 1487 * MOV EAX,reg 1488 * MOV EDX,m 1489 * MUL EDX 1490 * MOV EAX,reg 1491 * SUB EAX,EDX 1492 * SHR EAX,1 1493 * LEA R3,[EAX][EDX] 1494 * SHR R3,shpost-1 1495 */ 1496 assert(shpre == 0); 1497 1498 regm = allregs & ~(mAX | mDX); 1499 codelem(cdb,e1,®m,false); // eval left leaf 1500 reg = findreg(regm); 1501 getregs(cdb,mAX | mDX); 1502 genmovreg(cdb,AX,reg); // MOV EAX,reg 1503 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1504 getregs(cdb,regm | mDX | mAX); 1505 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1506 genmovreg(cdb,AX,reg); // MOV EAX,reg 1507 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1508 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1); // SHR EAX,1 1509 regm_t regm3 = allregs; 1510 if (oper == OPmod || oper == OPremquo) 1511 { 1512 regm3 &= ~regm; 1513 if (oper == OPremquo || !el_signx32(e2)) 1514 regm3 &= ~mAX; 1515 } 1516 allocreg(cdb,®m3,&r3,TYint); 1517 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX] 1518 if (shpost != 1) 1519 cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1); // SHR R3,shpost-1 1520 } 1521 else 1522 { 1523 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost) 1524 * SHR EAX,shpre 1525 * MOV reg,m 1526 * MUL reg 1527 * SHR EDX,shpost 1528 */ 1529 regm = mAX; 1530 if (oper == OPmod || oper == OPremquo) 1531 regm = allregs & ~(mAX|mDX); 1532 codelem(cdb,e1,®m,false); // eval left leaf 1533 reg = findreg(regm); 1534 1535 if (reg != AX) 1536 { 1537 getregs(cdb,mAX); 1538 genmovreg(cdb,AX,reg); // MOV EAX,reg 1539 } 1540 if (shpre) 1541 { 1542 getregs(cdb,mAX); 1543 cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre); // SHR EAX,shpre 1544 } 1545 getregs(cdb,mDX); 1546 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1547 getregs(cdb,mDX | mAX); 1548 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1549 if (shpost) 1550 cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost); // SHR EDX,shpost 1551 r3 = DX; 1552 } 1553 1554 regm_t resreg; 1555 switch (oper) 1556 { case OPdiv: 1557 // r3 = quotient 1558 resreg = mask(r3); 1559 break; 1560 1561 case OPmod: 1562 /* reg = original value 1563 * r3 = quotient 1564 */ 1565 assert(!(regm & mAX)); 1566 if (el_signx32(e2)) 1567 { 1568 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1569 } 1570 else 1571 { 1572 assert(!(mask(r3) & mAX)); 1573 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1574 getregs(cdb,mAX); 1575 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1576 } 1577 getregs(cdb,regm); 1578 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1579 resreg = regm; 1580 break; 1581 1582 case OPremquo: 1583 /* reg = original value 1584 * r3 = quotient 1585 */ 1586 assert(!(mask(r3) & (mAX|regm))); 1587 assert(!(regm & mAX)); 1588 if (el_signx32(e2)) 1589 { 1590 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1591 } 1592 else 1593 { 1594 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1595 getregs(cdb,mAX); 1596 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1597 } 1598 getregs(cdb,regm); 1599 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1600 genmovreg(cdb, AX, r3); // MOV EAX,r3 1601 genmovreg(cdb, DX, reg); // MOV EDX,reg 1602 resreg = mDX | mAX; 1603 break; 1604 1605 default: 1606 assert(0); 1607 } 1608 freenode(e2); 1609 fixresult(cdb,e,resreg,pretregs); 1610 return; 1611 } 1612 1613 const int pow2 = ispow2(e2factor); 1614 1615 // Register pair signed divide by power of 2 1616 if (sz == REGSIZE * 2 && 1617 (oper == OPdiv) && !uns && 1618 pow2 != -1 && 1619 I32 // not set up for I64 cent yet 1620 ) 1621 { 1622 regm_t retregs = mDX | mAX; 1623 if (pow2 == 63 && !(retregs & BYTEREGS & mLSW)) 1624 retregs = (retregs & mMSW) | (BYTEREGS & mLSW); // because of SETZ 1625 1626 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1627 const rhi = findregmsw(retregs); 1628 const rlo = findreglsw(retregs); 1629 freenode(e2); 1630 getregs(cdb,retregs); 1631 1632 if (pow2 < 32) 1633 { 1634 reg_t r1 = allocScratchReg(cdb, allregs & ~retregs); 1635 1636 genmovreg(cdb,r1,rhi); // MOV r1,rhi 1637 if (pow2 == 1) 1638 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR r1,31 1639 else 1640 { 1641 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1642 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND r1,mask 1643 } 1644 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1645 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 1646 cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2); // SHRD rlo,rhi,pow2 1647 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2); // SAR rhi,pow2 1648 } 1649 else if (pow2 == 32) 1650 { 1651 reg_t r1 = allocScratchReg(cdb, allregs & ~retregs); 1652 1653 genmovreg(cdb,r1,rhi); // MOV r1,rhi 1654 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1655 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1656 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 1657 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 1658 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 1659 } 1660 else if (pow2 < 63) 1661 { 1662 reg_t r1 = allocScratchReg(cdb, allregs & ~retregs); 1663 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1))); 1664 1665 genmovreg(cdb,r1,rhi); // MOV r1,rhi 1666 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1667 cdb.genmovreg(r2,r1); // MOV r2,r1 1668 1669 if (pow2 == 33) 1670 { 1671 cdb.gen2(0xF7,modregrmx(3,3,r1)); // NEG r1 1672 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2)); // ADD rlo,r2 1673 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1)); // ADC rhi,r1 1674 } 1675 else 1676 { 1677 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask 1678 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1679 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 1680 } 1681 1682 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 1683 cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32); // SAR rlo,pow2-32 1684 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 1685 } 1686 else 1687 { 1688 // This may be better done by cgelem.d 1689 assert(pow2 == 63); 1690 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000 1691 cdb.genregs(0x09,rlo,rhi); // OR rlo,rhi 1692 cdb.gen2(0x0F94,modregrmx(3,0,rlo)); // SETZ rlo 1693 cdb.genregs(MOVZXb,rlo,rlo); // MOVZX rlo,rloL 1694 movregconst(cdb,rhi,0,0); // MOV rhi,0 1695 } 1696 1697 fixresult(cdb,e,retregs,pretregs); 1698 return; 1699 } 1700 1701 // Register pair signed modulo by power of 2 1702 if (sz == REGSIZE * 2 && 1703 (oper == OPmod) && !uns && 1704 pow2 != -1 && 1705 I32 // not set up for I64 cent yet 1706 ) 1707 { 1708 regm_t retregs = mDX | mAX; 1709 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1710 const rhi = findregmsw(retregs); 1711 const rlo = findreglsw(retregs); 1712 freenode(e2); 1713 getregs(cdb,retregs); 1714 1715 regm_t scratchm = allregs & ~retregs; 1716 if (pow2 == 63) 1717 scratchm &= BYTEREGS; // because of SETZ 1718 reg_t r1 = allocScratchReg(cdb, scratchm); 1719 1720 if (pow2 < 32) 1721 { 1722 cdb.genmovreg(r1,rhi); // MOV r1,rhi 1723 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1724 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 1725 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1726 cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1); // AND rlo,(1<<pow2)-1 1727 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 1728 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1729 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 1730 } 1731 else if (pow2 == 32) 1732 { 1733 cdb.genmovreg(r1,rhi); // MOV r1,rhi 1734 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1735 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1736 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1737 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 1738 } 1739 else if (pow2 < 63) 1740 { 1741 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1))); 1742 1743 cdb.genmovreg(r1,rhi); // MOV r1,rhi 1744 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1745 cdb.genmovreg(r2,r1); // MOV r2,r1 1746 cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2); // SHRD r1,r2,64-pow2 1747 cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2); // SHR r2,64-pow2 1748 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1749 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 1750 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND rhi,(1<<(pow2-32))-1 1751 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1752 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2)); // SBB rhi,r2 1753 } 1754 else 1755 { 1756 // This may be better done by cgelem.d 1757 assert(pow2 == 63); 1758 1759 cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi] 1760 cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo)); // OR r1,rlo 1761 cdb.gen2(0x0F94,modregrmx(3,0,r1)); // SETZ r1 1762 cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL r1,31 1763 cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1)); // SUB rhi,r1 1764 } 1765 1766 fixresult(cdb,e,retregs,pretregs); 1767 return; 1768 } 1769 1770 if (sz > REGSIZE || !el_signx32(e2)) 1771 goto default; 1772 1773 // Special code for signed divide or modulo by power of 2 1774 if ((sz == REGSIZE || (I64 && sz == 4)) && 1775 (oper == OPdiv || oper == OPmod) && !uns && 1776 pow2 != -1 && 1777 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv) 1778 ) 1779 { 1780 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386) 1781 { 1782 /* MOV r,reg 1783 SHR r,31 1784 ADD reg,r 1785 SAR reg,1 1786 */ 1787 regm_t retregs = allregs; 1788 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1789 const reg = findreg(retregs); 1790 freenode(e2); 1791 getregs(cdb,retregs); 1792 1793 reg_t r = allocScratchReg(cdb, allregs & ~retregs); 1794 genmovreg(cdb,r,reg); // MOV r,reg 1795 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31 1796 cdb.gen2(0x03,grex | modregxrmx(3,reg,r)); // ADD reg,r 1797 cdb.gen2(0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 1798 regm_t resreg = retregs; 1799 fixresult(cdb,e,resreg,pretregs); 1800 return; 1801 } 1802 1803 regm_t resreg; 1804 switch (oper) 1805 { 1806 case OPdiv: 1807 resreg = mAX; 1808 break; 1809 1810 case OPmod: 1811 resreg = mDX; 1812 break; 1813 1814 case OPremquo: 1815 resreg = mDX | mAX; 1816 break; 1817 1818 default: 1819 assert(0); 1820 } 1821 1822 regm_t retregs = mAX; 1823 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1824 freenode(e2); 1825 getregs(cdb,mAX | mDX); // modify these regs 1826 cdb.gen1(0x99); // CWD 1827 code_orrex(cdb.last(), rex); 1828 if (pow2 == 1) 1829 { 1830 if (oper == OPdiv) 1831 { 1832 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1833 cdb.gen2(0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 1834 } 1835 else // OPmod 1836 { 1837 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1838 cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1 1839 cdb.gen2(0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 1840 } 1841 } 1842 else 1843 { targ_ulong m; 1844 1845 m = (1 << pow2) - 1; 1846 if (oper == OPdiv) 1847 { 1848 cdb.genc2(0x81,grex | modregrm(3,4,DX),m); // AND DX,m 1849 cdb.gen2(0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 1850 // Be careful not to generate this for 8088 1851 assert(config.target_cpu >= TARGET_80286); 1852 cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 1853 } 1854 else // OPmod 1855 { 1856 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1857 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1858 cdb.genc2(0x81,grex | modregrm(3,4,AX),m); // AND AX,mask 1859 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1860 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1861 resreg = mAX; 1862 } 1863 } 1864 fixresult(cdb,e,resreg,pretregs); 1865 return; 1866 } 1867 goto default; 1868 1869 case OPind: 1870 if (!e2.Ecount) // if not CSE 1871 goto case OPvar; // try OP reg,EA 1872 goto default; 1873 1874 default: // OPconst and operators 1875 //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg)); 1876 regm_t retregs = sz <= REGSIZE ? mAX : mDX | mAX; 1877 codelem(cdb,e1,&retregs,false); // eval left leaf 1878 regm_t rretregs; 1879 if (sz <= REGSIZE) // dedicated regs for div 1880 { 1881 // pick some other regs 1882 rretregs = isbyte ? BYTEREGS & ~mAX 1883 : ALLREGS & ~(mAX|mDX); 1884 } 1885 else 1886 { 1887 assert(sz <= 2 * REGSIZE); 1888 rretregs = mCX | mBX; // second arg 1889 } 1890 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1891 if (sz <= REGSIZE) 1892 { 1893 getregs(cdb,mAX | mDX); // trash these regs 1894 if (uns) // unsigned divide 1895 { 1896 movregconst(cdb,DX,0,(sz == 8) ? 64 : 0); // MOV DX,0 1897 getregs(cdb,mDX); 1898 } 1899 else 1900 { 1901 cdb.gen1(0x99); // CWD 1902 code_orrex(cdb.last(),rex); 1903 } 1904 reg_t rreg = findreg(rretregs); 1905 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,7 - uns,rreg)); // OP AX,rreg 1906 if (I64 && isbyte && rreg >= 4) 1907 code_orrex(cdb.last(), REX); 1908 regm_t resreg; 1909 switch (oper) 1910 { 1911 case OPdiv: 1912 resreg = mAX; 1913 break; 1914 1915 case OPmod: 1916 resreg = mDX; 1917 break; 1918 1919 case OPremquo: 1920 resreg = mDX | mAX; 1921 break; 1922 1923 default: 1924 assert(0); 1925 } 1926 fixresult(cdb,e,resreg,pretregs); 1927 } 1928 else if (sz == 2 * REGSIZE) 1929 { 1930 uint lib; 1931 switch (oper) 1932 { 1933 case OPdiv: 1934 case OPremquo: 1935 lib = uns ? CLIB.uldiv : CLIB.ldiv; 1936 break; 1937 1938 case OPmod: 1939 lib = uns ? CLIB.ulmod : CLIB.lmod; 1940 break; 1941 1942 default: 1943 assert(0); 1944 } 1945 1946 regm_t keepregs = I32 ? mSI | mDI : 0; 1947 callclib(cdb,e,lib,pretregs,keepregs); 1948 } 1949 else 1950 assert(0); 1951 return; 1952 1953 case OPvar: 1954 if (I16 || sz == 2 * REGSIZE) 1955 goto default; // have to handle it with codelem() 1956 1957 // loadea() handles CWD or CLR DX for divides 1958 regm_t retregs = mAX; 1959 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1960 loadea(cdb,e2,&cs,0xF7 ^ isbyte,7 - uns,0, 1961 mAX | mDX, 1962 mAX | mDX); 1963 freenode(e2); 1964 regm_t resreg; 1965 switch (oper) 1966 { 1967 case OPdiv: 1968 resreg = mAX; 1969 break; 1970 1971 case OPmod: 1972 resreg = mDX; 1973 break; 1974 1975 case OPremquo: 1976 resreg = mDX | mAX; 1977 break; 1978 1979 default: 1980 assert(0); 1981 } 1982 fixresult(cdb,e,resreg,pretregs); 1983 return; 1984 } 1985 assert(0); 1986 } 1987 1988 1989 /*************************** 1990 * Handle OPnot and OPbool. 1991 * Generate: 1992 * c: [evaluate e1] 1993 * cfalse: [save reg code] 1994 * clr reg 1995 * jmp cnop 1996 * ctrue: [save reg code] 1997 * clr reg 1998 * inc reg 1999 * cnop: nop 2000 */ 2001 2002 void cdnot(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2003 { 2004 //printf("cdnot()\n"); 2005 reg_t reg; 2006 tym_t forflags; 2007 regm_t retregs; 2008 elem *e1 = e.EV.E1; 2009 2010 if (*pretregs == 0) 2011 goto L1; 2012 if (*pretregs == mPSW) 2013 { //assert(e.Eoper != OPnot && e.Eoper != OPbool);*/ /* should've been optimized 2014 L1: 2015 codelem(cdb,e1,pretregs,false); // evaluate e1 for cc 2016 return; 2017 } 2018 2019 OPER op = e.Eoper; 2020 uint sz = tysize(e1.Ety); 2021 uint rex = (I64 && sz == 8) ? REX_W : 0; 2022 uint grex = rex << 16; 2023 2024 if (!tyfloating(e1.Ety)) 2025 { 2026 if (sz <= REGSIZE && e1.Eoper == OPvar) 2027 { code cs; 2028 2029 getlvalue(cdb,&cs,e1,0); 2030 freenode(e1); 2031 if (!I16 && sz == 2) 2032 cs.Iflags |= CFopsize; 2033 2034 retregs = *pretregs & (ALLREGS | mBP); 2035 if (config.target_cpu >= TARGET_80486 && 2036 tysize(e.Ety) == 1) 2037 { 2038 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,®)) 2039 { 2040 cs.Iop = 0x39; 2041 if (I64 && (sz == 1) && reg >= 4) 2042 cs.Irex |= REX; 2043 } 2044 else 2045 { cs.Iop = 0x81; 2046 reg = 7; 2047 cs.IFL2 = FLconst; 2048 cs.IEV2.Vint = 0; 2049 } 2050 cs.Iop ^= (sz == 1); 2051 code_newreg(&cs,reg); 2052 cdb.gen(&cs); // CMP e1,0 2053 2054 retregs &= BYTEREGS; 2055 if (!retregs) 2056 retregs = BYTEREGS; 2057 allocreg(cdb,&retregs,®,TYint); 2058 2059 const opcode_t iop = (op == OPbool) 2060 ? 0x0F95 // SETNZ rm8 2061 : 0x0F94; // SETZ rm8 2062 cdb.gen2(iop, modregrmx(3,0,reg)); 2063 if (reg >= 4) 2064 code_orrex(cdb.last(), REX); 2065 if (op == OPbool) 2066 *pretregs &= ~mPSW; 2067 goto L4; 2068 } 2069 2070 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,®)) 2071 cs.Iop = 0x39; 2072 else 2073 { cs.Iop = 0x81; 2074 reg = 7; 2075 cs.IFL2 = FLconst; 2076 cs.IEV2.Vint = 1; 2077 } 2078 if (I64 && (sz == 1) && reg >= 4) 2079 cs.Irex |= REX; 2080 cs.Iop ^= (sz == 1); 2081 code_newreg(&cs,reg); 2082 cdb.gen(&cs); // CMP e1,1 2083 2084 allocreg(cdb,&retregs,®,TYint); 2085 op ^= (OPbool ^ OPnot); // switch operators 2086 goto L2; 2087 } 2088 else if (config.target_cpu >= TARGET_80486 && 2089 tysize(e.Ety) == 1) 2090 { 2091 int jop = jmpopcode(e.EV.E1); 2092 retregs = mPSW; 2093 codelem(cdb,e.EV.E1,&retregs,false); 2094 retregs = *pretregs & BYTEREGS; 2095 if (!retregs) 2096 retregs = BYTEREGS; 2097 allocreg(cdb,&retregs,®,TYint); 2098 2099 int iop = 0x0F90 | (jop & 0x0F); // SETcc rm8 2100 if (op == OPnot) 2101 iop ^= 1; 2102 cdb.gen2(iop,grex | modregrmx(3,0,reg)); 2103 if (reg >= 4) 2104 code_orrex(cdb.last(), REX); 2105 if (op == OPbool) 2106 *pretregs &= ~mPSW; 2107 goto L4; 2108 } 2109 else if (sz <= REGSIZE && 2110 // NEG bytereg is too expensive 2111 (sz != 1 || config.target_cpu < TARGET_PentiumPro)) 2112 { 2113 retregs = *pretregs & (ALLREGS | mBP); 2114 if (sz == 1 && !(retregs &= BYTEREGS)) 2115 retregs = BYTEREGS; 2116 codelem(cdb,e.EV.E1,&retregs,false); 2117 reg = findreg(retregs); 2118 getregs(cdb,retregs); 2119 cdb.gen2(sz == 1 ? 0xF6 : 0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2120 code_orflag(cdb.last(),CFpsw); 2121 if (!I16 && sz == SHORTSIZE) 2122 code_orflag(cdb.last(),CFopsize); 2123 L2: 2124 genregs(cdb,0x19,reg,reg); // SBB reg,reg 2125 code_orrex(cdb.last(), rex); 2126 // At this point, reg==0 if e1==0, reg==-1 if e1!=0 2127 if (op == OPnot) 2128 { 2129 if (I64) 2130 cdb.gen2(0xFF,grex | modregrmx(3,0,reg)); // INC reg 2131 else 2132 cdb.gen1(0x40 + reg); // INC reg 2133 } 2134 else 2135 cdb.gen2(0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2136 if (*pretregs & mPSW) 2137 { code_orflag(cdb.last(),CFpsw); 2138 *pretregs &= ~mPSW; // flags are always set anyway 2139 } 2140 L4: 2141 fixresult(cdb,e,retregs,pretregs); 2142 return; 2143 } 2144 } 2145 code *cnop = gennop(null); 2146 code *ctrue = gennop(null); 2147 logexp(cdb,e.EV.E1,(op == OPnot) ? false : true,FLcode,ctrue); 2148 forflags = *pretregs & mPSW; 2149 if (I64 && sz == 8) 2150 forflags |= 64; 2151 assert(tysize(e.Ety) <= REGSIZE); // result better be int 2152 CodeBuilder cdbfalse; 2153 cdbfalse.ctor(); 2154 allocreg(cdbfalse,pretregs,®,e.Ety); // allocate reg for result 2155 code *cfalse = cdbfalse.finish(); 2156 CodeBuilder cdbtrue; 2157 cdbtrue.ctor(); 2158 cdbtrue.append(ctrue); 2159 for (code *c1 = cfalse; c1; c1 = code_next(c1)) 2160 cdbtrue.gen(c1); // duplicate reg save code 2161 CodeBuilder cdbfalse2; 2162 cdbfalse2.ctor(); 2163 movregconst(cdbfalse2,reg,0,forflags); // mov 0 into reg 2164 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2165 movregconst(cdbtrue,reg,1,forflags); // mov 1 into reg 2166 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2167 genjmp(cdbfalse2,JMP,FLcode,cast(block *) cnop); // skip over ctrue 2168 cdb.append(cfalse); 2169 cdb.append(cdbfalse2); 2170 cdb.append(cdbtrue); 2171 cdb.append(cnop); 2172 } 2173 2174 2175 /************************ 2176 * Complement operator 2177 */ 2178 2179 void cdcom(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2180 { 2181 if (*pretregs == 0) 2182 { 2183 codelem(cdb,e.EV.E1,pretregs,false); 2184 return; 2185 } 2186 tym_t tym = tybasic(e.Ety); 2187 int sz = _tysize[tym]; 2188 uint rex = (I64 && sz == 8) ? REX_W : 0; 2189 regm_t possregs = (sz == 1) ? BYTEREGS : allregs; 2190 regm_t retregs = *pretregs & possregs; 2191 if (retregs == 0) 2192 retregs = possregs; 2193 codelem(cdb,e.EV.E1,&retregs,false); 2194 getregs(cdb,retregs); // retregs will be destroyed 2195 2196 if (0 && sz == 4 * REGSIZE) 2197 { 2198 cdb.gen2(0xF7,modregrm(3,2,AX)); // NOT AX 2199 cdb.gen2(0xF7,modregrm(3,2,BX)); // NOT BX 2200 cdb.gen2(0xF7,modregrm(3,2,CX)); // NOT CX 2201 cdb.gen2(0xF7,modregrm(3,2,DX)); // NOT DX 2202 } 2203 else 2204 { 2205 const reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs); 2206 const op = (sz == 1) ? 0xF6 : 0xF7; 2207 genregs(cdb,op,2,reg); // NOT reg 2208 code_orrex(cdb.last(), rex); 2209 if (I64 && sz == 1 && reg >= 4) 2210 code_orrex(cdb.last(), REX); 2211 if (sz == 2 * REGSIZE) 2212 { 2213 const reg2 = findreglsw(retregs); 2214 genregs(cdb,op,2,reg2); // NOT reg+1 2215 } 2216 } 2217 fixresult(cdb,e,retregs,pretregs); 2218 } 2219 2220 /************************ 2221 * Bswap operator 2222 */ 2223 2224 void cdbswap(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2225 { 2226 if (*pretregs == 0) 2227 { 2228 codelem(cdb,e.EV.E1,pretregs,false); 2229 return; 2230 } 2231 2232 const tym = tybasic(e.Ety); 2233 const sz = _tysize[tym]; 2234 const posregs = (sz == 2) ? BYTEREGS : allregs; 2235 regm_t retregs = *pretregs & posregs; 2236 if (retregs == 0) 2237 retregs = posregs; 2238 codelem(cdb,e.EV.E1,&retregs,false); 2239 getregs(cdb,retregs); // retregs will be destroyed 2240 if (sz == 2 * REGSIZE) 2241 { 2242 assert(sz != 16); // no cent support yet 2243 const msreg = findregmsw(retregs); 2244 cdb.gen1(0x0FC8 + (msreg & 7)); // BSWAP msreg 2245 const lsreg = findreglsw(retregs); 2246 cdb.gen1(0x0FC8 + (lsreg & 7)); // BSWAP lsreg 2247 cdb.gen2(0x87,modregrm(3,msreg,lsreg)); // XCHG msreg,lsreg 2248 } 2249 else 2250 { 2251 const reg = findreg(retregs); 2252 if (sz == 2) 2253 { 2254 genregs(cdb,0x86,reg+4,reg); // XCHG regL,regH 2255 } 2256 else 2257 { 2258 assert(sz == 4 || sz == 8); 2259 cdb.gen1(0x0FC8 + (reg & 7)); // BSWAP reg 2260 ubyte rex = 0; 2261 if (sz == 8) 2262 rex |= REX_W; 2263 if (reg & 8) 2264 rex |= REX_B; 2265 if (rex) 2266 code_orrex(cdb.last(), rex); 2267 } 2268 } 2269 fixresult(cdb,e,retregs,pretregs); 2270 } 2271 2272 /************************* 2273 * ?: operator 2274 */ 2275 2276 void cdcond(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2277 { 2278 con_t regconold,regconsave; 2279 uint stackpushold,stackpushsave; 2280 int ehindexold,ehindexsave; 2281 uint sz2; 2282 2283 /* vars to save state of 8087 */ 2284 int stackusedold,stackusedsave; 2285 NDP[global87.stack.length] _8087old; 2286 NDP[global87.stack.length] _8087save; 2287 2288 //printf("cdcond(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 2289 elem *e1 = e.EV.E1; 2290 elem *e2 = e.EV.E2; 2291 elem *e21 = e2.EV.E1; 2292 elem *e22 = e2.EV.E2; 2293 regm_t psw = *pretregs & mPSW; /* save PSW bit */ 2294 const op1 = e1.Eoper; 2295 uint sz1 = tysize(e1.Ety); 2296 uint jop = jmpopcode(e1); 2297 2298 uint jop1 = jmpopcode(e21); 2299 uint jop2 = jmpopcode(e22); 2300 2301 docommas(cdb,&e1); 2302 cgstate.stackclean++; 2303 2304 if (!OTrel(op1) && e1 == e21 && 2305 sz1 <= REGSIZE && !tyfloating(e1.Ety)) 2306 { // Recognize (e ? e : f) 2307 2308 code *cnop1 = gennop(null); 2309 regm_t retregs = *pretregs | mPSW; 2310 codelem(cdb,e1,&retregs,false); 2311 2312 cse_flush(cdb,1); // flush CSEs to memory 2313 genjmp(cdb,jop,FLcode,cast(block *)cnop1); 2314 freenode(e21); 2315 2316 regconsave = regcon; 2317 stackpushsave = stackpush; 2318 2319 retregs |= psw; 2320 if (retregs & (mBP | ALLREGS)) 2321 regimmed_set(findreg(retregs),0); 2322 codelem(cdb,e22,&retregs,false); 2323 2324 andregcon(®consave); 2325 assert(stackpushsave == stackpush); 2326 2327 *pretregs = retregs; 2328 freenode(e2); 2329 cdb.append(cnop1); 2330 cgstate.stackclean--; 2331 return; 2332 } 2333 2334 if (OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE && 2335 !e1.Ecount && 2336 (jop == JC || jop == JNC) && 2337 (sz2 = tysize(e2.Ety)) <= REGSIZE && 2338 e21.Eoper == OPconst && 2339 e22.Eoper == OPconst 2340 ) 2341 { 2342 uint sz = tysize(e.Ety); 2343 uint rex = (I64 && sz == 8) ? REX_W : 0; 2344 uint grex = rex << 16; 2345 2346 regm_t retregs; 2347 targ_size_t v1,v2; 2348 2349 if (sz2 != 1 || I64) 2350 { 2351 retregs = *pretregs & (ALLREGS | mBP); 2352 if (!retregs) 2353 retregs = ALLREGS; 2354 } 2355 else 2356 { 2357 retregs = *pretregs & BYTEREGS; 2358 if (!retregs) 2359 retregs = BYTEREGS; 2360 } 2361 2362 cdcmp_flag = 1 | rex; 2363 v1 = cast(targ_size_t)e21.EV.Vllong; 2364 v2 = cast(targ_size_t)e22.EV.Vllong; 2365 if (jop == JNC) 2366 { v1 = v2; 2367 v2 = cast(targ_size_t)e21.EV.Vllong; 2368 } 2369 2370 opcode_t opcode = 0x81; 2371 switch (sz2) 2372 { case 1: opcode--; 2373 v1 = cast(byte) v1; 2374 v2 = cast(byte) v2; 2375 break; 2376 2377 case 2: v1 = cast(short) v1; 2378 v2 = cast(short) v2; 2379 break; 2380 2381 case 4: v1 = cast(int) v1; 2382 v2 = cast(int) v2; 2383 break; 2384 default: 2385 break; 2386 } 2387 2388 if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1) 2389 { 2390 // only zero-extension from 32-bits is available for 'or' 2391 } 2392 else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2) 2393 { 2394 // only sign-extension from 32-bits is available for 'and' 2395 } 2396 else 2397 { 2398 codelem(cdb,e1,&retregs,false); 2399 const reg = findreg(retregs); 2400 2401 if (v1 == 0 && v2 == ~cast(targ_size_t)0) 2402 { 2403 cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg)); // NOT reg 2404 if (I64 && sz2 == REGSIZE) 2405 code_orrex(cdb.last(), REX_W); 2406 } 2407 else 2408 { 2409 v1 -= v2; 2410 cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1); // AND reg,v1-v2 2411 if (I64 && sz2 == 1 && reg >= 4) 2412 code_orrex(cdb.last(), REX); 2413 if (v2 == 1 && !I64) 2414 cdb.gen1(0x40 + reg); // INC reg 2415 else if (v2 == -1L && !I64) 2416 cdb.gen1(0x48 + reg); // DEC reg 2417 else 2418 { cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2); // ADD reg,v2 2419 if (I64 && sz2 == 1 && reg >= 4) 2420 code_orrex(cdb.last(), REX); 2421 } 2422 } 2423 2424 freenode(e21); 2425 freenode(e22); 2426 freenode(e2); 2427 2428 fixresult(cdb,e,retregs,pretregs); 2429 cgstate.stackclean--; 2430 return; 2431 } 2432 } 2433 2434 if (op1 != OPcond && op1 != OPandand && op1 != OPoror && 2435 op1 != OPnot && op1 != OPbool && 2436 e21.Eoper == OPconst && 2437 sz1 <= REGSIZE && 2438 *pretregs & (mBP | ALLREGS) && 2439 tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety)) 2440 { // Recognize (e ? c : f) 2441 2442 code *cnop1 = gennop(null); 2443 regm_t retregs = mPSW; 2444 jop = jmpopcode(e1); // get jmp condition 2445 codelem(cdb,e1,&retregs,false); 2446 2447 // Set the register with e21 without affecting the flags 2448 retregs = *pretregs & (ALLREGS | mBP); 2449 if (retregs & ~regcon.mvar) 2450 retregs &= ~regcon.mvar; // don't disturb register variables 2451 // NOTE: see my email (sign extension bug? possible fix, some questions 2452 reg_t reg; 2453 regwithvalue(cdb,retregs,cast(targ_size_t)e21.EV.Vllong,®,tysize(e21.Ety) == 8 ? 64|8 : 8); 2454 retregs = mask(reg); 2455 2456 cse_flush(cdb,1); // flush CSE's to memory 2457 genjmp(cdb,jop,FLcode,cast(block *)cnop1); 2458 freenode(e21); 2459 2460 regconsave = regcon; 2461 stackpushsave = stackpush; 2462 2463 codelem(cdb,e22,&retregs,false); 2464 2465 andregcon(®consave); 2466 assert(stackpushsave == stackpush); 2467 2468 freenode(e2); 2469 cdb.append(cnop1); 2470 fixresult(cdb,e,retregs,pretregs); 2471 cgstate.stackclean--; 2472 return; 2473 } 2474 2475 code *cnop1 = gennop(null); 2476 code *cnop2 = gennop(null); // dummy target addresses 2477 logexp(cdb,e1,false,FLcode,cnop1); // evaluate condition 2478 regconold = regcon; 2479 stackusedold = global87.stackused; 2480 stackpushold = stackpush; 2481 memcpy(_8087old.ptr,global87.stack.ptr,global87.stack.sizeof); 2482 regm_t retregs = *pretregs; 2483 CodeBuilder cdb1; 2484 cdb1.ctor(); 2485 if (psw && jop1 != JNE) 2486 { 2487 retregs &= ~mPSW; 2488 if (!retregs) 2489 retregs = ALLREGS; 2490 codelem(cdb1,e21,&retregs,false); 2491 fixresult(cdb1,e21,retregs,pretregs); 2492 } 2493 else 2494 codelem(cdb1,e21,&retregs,false); 2495 2496 if (CPP && e2.Eoper == OPcolon2) 2497 { 2498 code cs; 2499 2500 // This is necessary so that any cleanup code on one branch 2501 // is redone on the other branch. 2502 cs.Iop = ESCAPE | ESCmark2; 2503 cs.Iflags = 0; 2504 cs.Irex = 0; 2505 cdb.gen(&cs); 2506 cdb.append(cdb1); 2507 cs.Iop = ESCAPE | ESCrelease2; 2508 cdb.gen(&cs); 2509 } 2510 else 2511 cdb.append(cdb1); 2512 2513 regconsave = regcon; 2514 regcon = regconold; 2515 2516 stackpushsave = stackpush; 2517 stackpush = stackpushold; 2518 2519 stackusedsave = global87.stackused; 2520 global87.stackused = stackusedold; 2521 2522 memcpy(_8087save.ptr,global87.stack.ptr,global87.stack.sizeof); 2523 memcpy(global87.stack.ptr,_8087old.ptr,global87.stack.sizeof); 2524 2525 retregs |= psw; // PSW bit may have been trashed 2526 CodeBuilder cdb2; 2527 cdb2.ctor(); 2528 if (psw && jop2 != JNE) 2529 { 2530 retregs &= ~mPSW; 2531 if (!retregs) 2532 retregs = ALLREGS; 2533 codelem(cdb2,e22,&retregs,false); 2534 fixresult(cdb2,e22,retregs,pretregs); 2535 } 2536 else 2537 codelem(cdb2,e22,&retregs,false); // use same regs as E1 2538 *pretregs = retregs | psw; 2539 andregcon(®conold); 2540 andregcon(®consave); 2541 assert(global87.stackused == stackusedsave); 2542 assert(stackpush == stackpushsave); 2543 memcpy(global87.stack.ptr,_8087save.ptr,global87.stack.sizeof); 2544 freenode(e2); 2545 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); 2546 cdb.append(cnop1); 2547 cdb.append(cdb2); 2548 cdb.append(cnop2); 2549 if (*pretregs & mST0) 2550 note87(e,0,0); 2551 2552 cgstate.stackclean--; 2553 } 2554 2555 /********************* 2556 * Comma operator OPcomma 2557 */ 2558 2559 void cdcomma(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2560 { 2561 regm_t retregs = 0; 2562 codelem(cdb,e.EV.E1,&retregs,false); // ignore value from left leaf 2563 codelem(cdb,e.EV.E2,pretregs,false); // do right leaf 2564 } 2565 2566 2567 /********************************* 2568 * Do && and || operators. 2569 * Generate: 2570 * (evaluate e1 and e2, if true goto cnop1) 2571 * cnop3: NOP 2572 * cg: [save reg code] ;if we must preserve reg 2573 * CLR reg ;false result (set Z also) 2574 * JMP cnop2 2575 * 2576 * cnop1: NOP ;if e1 evaluates to true 2577 * [save reg code] ;preserve reg 2578 * 2579 * MOV reg,1 ;true result 2580 * or 2581 * CLR reg ;if return result in flags 2582 * INC reg 2583 * 2584 * cnop2: NOP ;mark end of code 2585 */ 2586 2587 void cdloglog(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2588 { 2589 /* We can trip the assert with the following: 2590 * if ( (b<=a) ? (c<b || a<=c) : c>=a ) 2591 * We'll generate ugly code for it, but it's too obscure a case 2592 * to expend much effort on it. 2593 * assert(*pretregs != mPSW); 2594 */ 2595 2596 cgstate.stackclean++; 2597 code *cnop1 = gennop(null); 2598 CodeBuilder cdb1; 2599 cdb1.ctor(); 2600 cdb1.append(cnop1); 2601 code *cnop3 = gennop(null); 2602 elem *e2 = e.EV.E2; 2603 (e.Eoper == OPoror) 2604 ? logexp(cdb,e.EV.E1,1,FLcode,cnop1) 2605 : logexp(cdb,e.EV.E1,0,FLcode,cnop3); 2606 con_t regconsave = regcon; 2607 uint stackpushsave = stackpush; 2608 if (*pretregs == 0) // if don't want result 2609 { 2610 int noreturn = !el_returns(e2); 2611 codelem(cdb,e2,pretregs,false); 2612 if (noreturn) 2613 { 2614 regconsave.used |= regcon.used; 2615 regcon = regconsave; 2616 } 2617 else 2618 andregcon(®consave); 2619 assert(stackpush == stackpushsave); 2620 cdb.append(cnop3); 2621 cdb.append(cdb1); // eval code, throw away result 2622 cgstate.stackclean--; 2623 return; 2624 } 2625 code *cnop2 = gennop(null); 2626 uint sz = tysize(e.Ety); 2627 if (tybasic(e2.Ety) == TYbool && 2628 sz == tysize(e2.Ety) && 2629 !(*pretregs & mPSW) && 2630 e2.Eoper == OPcall) 2631 { 2632 codelem(cdb,e2,pretregs,false); 2633 2634 andregcon(®consave); 2635 2636 // stack depth should not change when evaluating E2 2637 assert(stackpush == stackpushsave); 2638 2639 assert(sz <= 4); // result better be int 2640 regm_t retregs = *pretregs & allregs; 2641 reg_t reg; 2642 allocreg(cdb1,&retregs,®,TYint); // allocate reg for result 2643 movregconst(cdb1,reg,e.Eoper == OPoror,0); // reg = 1 2644 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2645 *pretregs = retregs; 2646 if (e.Eoper == OPoror) 2647 { 2648 cdb.append(cnop3); 2649 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2650 cdb.append(cdb1); 2651 cdb.append(cnop2); 2652 } 2653 else 2654 { 2655 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2656 cdb.append(cnop3); 2657 cdb.append(cdb1); 2658 cdb.append(cnop2); 2659 } 2660 cgstate.stackclean--; 2661 return; 2662 } 2663 logexp(cdb,e2,1,FLcode,cnop1); 2664 andregcon(®consave); 2665 2666 // stack depth should not change when evaluating E2 2667 assert(stackpush == stackpushsave); 2668 2669 assert(sz <= 4); // result better be int 2670 regm_t retregs = *pretregs & (ALLREGS | mBP); 2671 if (!retregs) 2672 retregs = ALLREGS; // if mPSW only 2673 CodeBuilder cdbcg; 2674 cdbcg.ctor(); 2675 reg_t reg; 2676 allocreg(cdbcg,&retregs,®,TYint); // allocate reg for result 2677 code *cg = cdbcg.finish(); 2678 for (code *c1 = cg; c1; c1 = code_next(c1)) // for each instruction 2679 cdb1.gen(c1); // duplicate it 2680 CodeBuilder cdbcg2; 2681 cdbcg2.ctor(); 2682 movregconst(cdbcg2,reg,0,*pretregs & mPSW); // MOV reg,0 2683 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2684 genjmp(cdbcg2, JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2685 movregconst(cdb1,reg,1,*pretregs & mPSW); // reg = 1 2686 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2687 *pretregs = retregs; 2688 cdb.append(cnop3); 2689 cdb.append(cg); 2690 cdb.append(cdbcg2); 2691 cdb.append(cdb1); 2692 cdb.append(cnop2); 2693 cgstate.stackclean--; 2694 return; 2695 } 2696 2697 2698 /********************* 2699 * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror). 2700 */ 2701 2702 void cdshift(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2703 { 2704 reg_t resreg; 2705 uint shiftcnt; 2706 regm_t retregs,rretregs; 2707 2708 //printf("cdshift()\n"); 2709 elem *e1 = e.EV.E1; 2710 if (*pretregs == 0) // if don't want result 2711 { 2712 codelem(cdb,e1,pretregs,false); // eval left leaf 2713 *pretregs = 0; // in case they got set 2714 codelem(cdb,e.EV.E2,pretregs,false); 2715 return; 2716 } 2717 2718 tym_t tyml = tybasic(e1.Ety); 2719 int sz = _tysize[tyml]; 2720 assert(!tyfloating(tyml)); 2721 OPER oper = e.Eoper; 2722 uint grex = ((I64 && sz == 8) ? REX_W : 0) << 16; 2723 2724 version (SCPP) 2725 { 2726 // Do this until the rest of the compiler does OPshr/OPashr correctly 2727 if (oper == OPshr) 2728 oper = (tyuns(tyml)) ? OPshr : OPashr; 2729 } 2730 2731 uint s1,s2; 2732 switch (oper) 2733 { 2734 case OPshl: 2735 s1 = 4; // SHL 2736 s2 = 2; // RCL 2737 break; 2738 case OPshr: 2739 s1 = 5; // SHR 2740 s2 = 3; // RCR 2741 break; 2742 case OPashr: 2743 s1 = 7; // SAR 2744 s2 = 3; // RCR 2745 break; 2746 case OProl: 2747 s1 = 0; // ROL 2748 break; 2749 case OPror: 2750 s1 = 1; // ROR 2751 break; 2752 default: 2753 assert(0); 2754 } 2755 2756 reg_t sreg = NOREG; // guard against using value without assigning to sreg 2757 elem *e2 = e.EV.E2; 2758 regm_t forccs = *pretregs & mPSW; // if return result in CCs 2759 regm_t forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs 2760 bool e2isconst = false; // assume for the moment 2761 uint isbyte = (sz == 1); 2762 switch (e2.Eoper) 2763 { 2764 case OPconst: 2765 e2isconst = true; // e2 is a constant 2766 shiftcnt = e2.EV.Vint; // get shift count 2767 if ((!I16 && sz <= REGSIZE) || 2768 shiftcnt <= 4 || // if sequence of shifts 2769 (sz == 2 && 2770 (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) || 2771 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 2772 ) 2773 { 2774 retregs = (forregs) ? forregs 2775 : ALLREGS; 2776 if (isbyte) 2777 { retregs &= BYTEREGS; 2778 if (!retregs) 2779 retregs = BYTEREGS; 2780 } 2781 else if (sz > REGSIZE && sz <= 2 * REGSIZE && 2782 !(retregs & mMSW)) 2783 retregs |= mMSW & ALLREGS; 2784 if (s1 == 7) // if arithmetic right shift 2785 { 2786 if (shiftcnt == 8) 2787 retregs = mAX; 2788 else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 2789 retregs = mDX|mAX; 2790 } 2791 2792 if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE && 2793 oper == OPshl && 2794 !e1.Ecount && 2795 (e1.Eoper == OPs16_32 || e1.Eoper == OPu16_32 || 2796 e1.Eoper == OPs32_64 || e1.Eoper == OPu32_64) 2797 ) 2798 { // Handle (shtlng)s << 16 2799 regm_t r = retregs & mMSW; 2800 codelem(cdb,e1.EV.E1,&r,false); // eval left leaf 2801 regwithvalue(cdb,retregs & mLSW,0,&resreg,0); 2802 getregs(cdb,r); 2803 retregs = r | mask(resreg); 2804 if (forccs) 2805 { sreg = findreg(r); 2806 gentstreg(cdb,sreg); 2807 *pretregs &= ~mPSW; // already set 2808 } 2809 freenode(e1); 2810 freenode(e2); 2811 break; 2812 } 2813 2814 // See if we should use LEA reg,xxx instead of shift 2815 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 && 2816 (sz == REGSIZE || (I64 && sz == 4)) && 2817 oper == OPshl && 2818 e1.Eoper == OPvar && 2819 !(*pretregs & mPSW) && 2820 config.flags4 & CFG4speed 2821 ) 2822 { 2823 reg_t reg; 2824 regm_t regm; 2825 2826 if (isregvar(e1,®m,®) && !(regm & retregs)) 2827 { code cs; 2828 allocreg(cdb,&retregs,&resreg,e.Ety); 2829 buildEA(&cs,-1,reg,1 << shiftcnt,0); 2830 cs.Iop = LEA; 2831 code_newreg(&cs,resreg); 2832 cs.Iflags = 0; 2833 if (I64 && sz == 8) 2834 cs.Irex |= REX_W; 2835 cdb.gen(&cs); // LEA resreg,[reg * ss] 2836 freenode(e1); 2837 freenode(e2); 2838 break; 2839 } 2840 } 2841 2842 codelem(cdb,e1,&retregs,false); // eval left leaf 2843 //assert((retregs & regcon.mvar) == 0); 2844 getregs(cdb,retregs); // modify these regs 2845 2846 { 2847 if (sz == 2 * REGSIZE) 2848 { resreg = findregmsw(retregs); 2849 sreg = findreglsw(retregs); 2850 } 2851 else 2852 { resreg = findreg(retregs); 2853 sreg = NOREG; // an invalid value 2854 } 2855 if (config.target_cpu >= TARGET_80286 && 2856 sz <= REGSIZE) 2857 { 2858 // SHL resreg,shiftcnt 2859 assert(!(sz == 1 && (mask(resreg) & ~BYTEREGS))); 2860 cdb.genc2(0xC1 ^ isbyte,grex | modregxrmx(3,s1,resreg),shiftcnt); 2861 if (shiftcnt == 1) 2862 cdb.last().Iop += 0x10; // short form of shift 2863 if (I64 && sz == 1 && resreg >= 4) 2864 cdb.last().Irex |= REX; 2865 // See if we need operand size prefix 2866 if (!I16 && oper != OPshl && sz == 2) 2867 cdb.last().Iflags |= CFopsize; 2868 if (forccs) 2869 cdb.last().Iflags |= CFpsw; // need flags result 2870 } 2871 else if (shiftcnt == 8) 2872 { if (!(retregs & BYTEREGS) || resreg >= 4) 2873 { 2874 goto L1; 2875 } 2876 2877 if (pass != PASSfinal && (!forregs || forregs & (mSI | mDI))) 2878 { 2879 // e1 might get into SI or DI in a later pass, 2880 // so don't put CX into a register 2881 getregs(cdb,mCX); 2882 } 2883 2884 assert(sz == 2); 2885 switch (oper) 2886 { 2887 case OPshl: 2888 // MOV regH,regL XOR regL,regL 2889 assert(resreg < 4 && !grex); 2890 genregs(cdb,0x8A,resreg+4,resreg); 2891 genregs(cdb,0x32,resreg,resreg); 2892 break; 2893 2894 case OPshr: 2895 case OPashr: 2896 // MOV regL,regH 2897 genregs(cdb,0x8A,resreg,resreg+4); 2898 if (oper == OPashr) 2899 cdb.gen1(0x98); // CBW 2900 else 2901 genregs(cdb,0x32,resreg+4,resreg+4); // CLR regH 2902 break; 2903 2904 case OPror: 2905 case OProl: 2906 // XCHG regL,regH 2907 genregs(cdb,0x86,resreg+4,resreg); 2908 break; 2909 2910 default: 2911 assert(0); 2912 } 2913 if (forccs) 2914 gentstreg(cdb,resreg); 2915 } 2916 else if (shiftcnt == REGSIZE * 8) // it's an lword 2917 { 2918 if (oper == OPshl) 2919 swap(&resreg, &sreg); 2920 genmovreg(cdb,sreg,resreg); // MOV sreg,resreg 2921 if (oper == OPashr) 2922 cdb.gen1(0x99); // CWD 2923 else 2924 movregconst(cdb,resreg,0,0); // MOV resreg,0 2925 if (forccs) 2926 { 2927 gentstreg(cdb,sreg); 2928 *pretregs &= mBP | ALLREGS | mES; 2929 } 2930 } 2931 else 2932 { 2933 if (oper == OPshl && sz == 2 * REGSIZE) 2934 swap(&resreg, &sreg); 2935 while (shiftcnt--) 2936 { 2937 cdb.gen2(0xD1 ^ isbyte,modregrm(3,s1,resreg)); 2938 if (sz == 2 * REGSIZE) 2939 { 2940 code_orflag(cdb.last(),CFpsw); 2941 cdb.gen2(0xD1,modregrm(3,s2,sreg)); 2942 } 2943 } 2944 if (forccs) 2945 code_orflag(cdb.last(),CFpsw); 2946 } 2947 if (sz <= REGSIZE) 2948 *pretregs &= mBP | ALLREGS; // flags already set 2949 } 2950 freenode(e2); 2951 break; 2952 } 2953 goto default; 2954 2955 default: 2956 retregs = forregs & ~mCX; // CX will be shift count 2957 if (sz <= REGSIZE) 2958 { 2959 if (forregs & ~regcon.mvar && !(retregs & ~regcon.mvar)) 2960 retregs = ALLREGS & ~mCX; // need something 2961 else if (!retregs) 2962 retregs = ALLREGS & ~mCX; // need something 2963 if (sz == 1) 2964 { retregs &= mAX|mBX|mDX; 2965 if (!retregs) 2966 retregs = mAX|mBX|mDX; 2967 } 2968 } 2969 else 2970 { 2971 if (!(retregs & mMSW)) 2972 retregs = ALLREGS & ~mCX; 2973 } 2974 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 2975 2976 if (sz <= REGSIZE) 2977 resreg = findreg(retregs); 2978 else 2979 { 2980 resreg = findregmsw(retregs); 2981 sreg = findreglsw(retregs); 2982 } 2983 L1: 2984 rretregs = mCX; // CX is shift count 2985 if (sz <= REGSIZE) 2986 { 2987 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue 2988 getregs(cdb,retregs); // trash these regs 2989 cdb.gen2(0xD3 ^ isbyte,grex | modregrmx(3,s1,resreg)); // Sxx resreg,CX 2990 2991 if (!I16 && sz == 2 && (oper == OProl || oper == OPror)) 2992 cdb.last().Iflags |= CFopsize; 2993 2994 // Note that a shift by CL does not set the flags if 2995 // CL == 0. If e2 is a constant, we know it isn't 0 2996 // (it would have been optimized out). 2997 if (e2isconst) 2998 *pretregs &= mBP | ALLREGS; // flags already set with result 2999 } 3000 else if (sz == 2 * REGSIZE && 3001 config.target_cpu >= TARGET_80386) 3002 { 3003 reg_t hreg = resreg; 3004 reg_t lreg = sreg; 3005 uint rex = I64 ? (REX_W << 16) : 0; 3006 if (e2isconst) 3007 { 3008 getregs(cdb,retregs); 3009 if (shiftcnt & (REGSIZE * 8)) 3010 { 3011 if (oper == OPshr) 3012 { // SHR hreg,shiftcnt 3013 // MOV lreg,hreg 3014 // XOR hreg,hreg 3015 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt - (REGSIZE * 8)); 3016 genmovreg(cdb,lreg,hreg); 3017 movregconst(cdb,hreg,0,0); 3018 } 3019 else if (oper == OPashr) 3020 { // MOV lreg,hreg 3021 // SAR hreg,31 3022 // SHRD lreg,hreg,shiftcnt 3023 genmovreg(cdb,lreg,hreg); 3024 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),(REGSIZE * 8) - 1); 3025 cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt - (REGSIZE * 8)); 3026 } 3027 else 3028 { // SHL lreg,shiftcnt 3029 // MOV hreg,lreg 3030 // XOR lreg,lreg 3031 cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt - (REGSIZE * 8)); 3032 genmovreg(cdb,hreg,lreg); 3033 movregconst(cdb,lreg,0,0); 3034 } 3035 } 3036 else 3037 { 3038 if (oper == OPshr || oper == OPashr) 3039 { // SHRD lreg,hreg,shiftcnt 3040 // SHR/SAR hreg,shiftcnt 3041 cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt); 3042 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt); 3043 } 3044 else 3045 { // SHLD hreg,lreg,shiftcnt 3046 // SHL lreg,shiftcnt 3047 cdb.genc2(0x0FA4,rex | modregrm(3,lreg,hreg),shiftcnt); 3048 cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt); 3049 } 3050 } 3051 freenode(e2); 3052 } 3053 else if (config.target_cpu >= TARGET_80486 && REGSIZE == 2) 3054 { 3055 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX 3056 getregs(cdb,retregs); // modify these regs 3057 if (oper == OPshl) 3058 { 3059 /* 3060 SHLD hreg,lreg,CL 3061 SHL lreg,CL 3062 */ 3063 3064 cdb.gen2(0x0FA5,modregrm(3,lreg,hreg)); 3065 cdb.gen2(0xD3,modregrm(3,4,lreg)); 3066 } 3067 else 3068 { 3069 /* 3070 SHRD lreg,hreg,CL 3071 SAR hreg,CL 3072 3073 -- or -- 3074 3075 SHRD lreg,hreg,CL 3076 SHR hreg,CL 3077 */ 3078 cdb.gen2(0x0FAD,modregrm(3,hreg,lreg)); 3079 cdb.gen2(0xD3,modregrm(3,s1,hreg)); 3080 } 3081 } 3082 else 3083 { code* cl1,cl2; 3084 3085 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX 3086 getregs(cdb,retregs | mCX); // modify these regs 3087 // TEST CL,0x20 3088 cdb.genc2(0xF6,modregrm(3,0,CX),REGSIZE * 8); 3089 cl1 = gennop(null); 3090 CodeBuilder cdb1; 3091 cdb1.ctor(); 3092 cdb1.append(cl1); 3093 if (oper == OPshl) 3094 { 3095 /* TEST CL,20H 3096 JNE L1 3097 SHLD hreg,lreg,CL 3098 SHL lreg,CL 3099 JMP L2 3100 L1: AND CL,20H-1 3101 SHL lreg,CL 3102 MOV hreg,lreg 3103 XOR lreg,lreg 3104 L2: NOP 3105 */ 3106 3107 if (REGSIZE == 2) 3108 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 3109 cdb1.gen2(0xD3,modregrm(3,4,lreg)); 3110 genmovreg(cdb1,hreg,lreg); 3111 genregs(cdb1,0x31,lreg,lreg); 3112 3113 genjmp(cdb,JNE,FLcode,cast(block *)cl1); 3114 cdb.gen2(0x0FA5,modregrm(3,lreg,hreg)); 3115 cdb.gen2(0xD3,modregrm(3,4,lreg)); 3116 } 3117 else 3118 { if (oper == OPashr) 3119 { 3120 /* TEST CL,20H 3121 JNE L1 3122 SHRD lreg,hreg,CL 3123 SAR hreg,CL 3124 JMP L2 3125 L1: AND CL,15 3126 MOV lreg,hreg 3127 SAR hreg,31 3128 SHRD lreg,hreg,CL 3129 L2: NOP 3130 */ 3131 3132 if (REGSIZE == 2) 3133 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 3134 genmovreg(cdb1,lreg,hreg); 3135 cdb1.genc2(0xC1,modregrm(3,s1,hreg),31); 3136 cdb1.gen2(0x0FAD,modregrm(3,hreg,lreg)); 3137 } 3138 else 3139 { 3140 /* TEST CL,20H 3141 JNE L1 3142 SHRD lreg,hreg,CL 3143 SHR hreg,CL 3144 JMP L2 3145 L1: AND CL,15 3146 SHR hreg,CL 3147 MOV lreg,hreg 3148 XOR hreg,hreg 3149 L2: NOP 3150 */ 3151 3152 if (REGSIZE == 2) 3153 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 3154 cdb1.gen2(0xD3,modregrm(3,5,hreg)); 3155 genmovreg(cdb1,lreg,hreg); 3156 genregs(cdb1,0x31,hreg,hreg); 3157 } 3158 genjmp(cdb,JNE,FLcode,cast(block *)cl1); 3159 cdb.gen2(0x0FAD,modregrm(3,hreg,lreg)); 3160 cdb.gen2(0xD3,modregrm(3,s1,hreg)); 3161 } 3162 cl2 = gennop(null); 3163 genjmp(cdb,JMPS,FLcode,cast(block *)cl2); 3164 cdb.append(cdb1); 3165 cdb.append(cl2); 3166 } 3167 break; 3168 } 3169 else if (sz == 2 * REGSIZE) 3170 { 3171 scodelem(cdb,e2,&rretregs,retregs,false); 3172 getregs(cdb,retregs | mCX); 3173 if (oper == OPshl) 3174 swap(&resreg, &sreg); 3175 if (!e2isconst) // if not sure shift count != 0 3176 cdb.genc2(0xE3,0,6); // JCXZ .+6 3177 cdb.gen2(0xD1,modregrm(3,s1,resreg)); 3178 code_orflag(cdb.last(),CFtarg2); 3179 cdb.gen2(0xD1,modregrm(3,s2,sreg)); 3180 cdb.genc2(0xE2,0,cast(targ_uns)-6); // LOOP .-6 3181 regimmed_set(CX,0); // note that now CX == 0 3182 } 3183 else 3184 assert(0); 3185 break; 3186 } 3187 fixresult(cdb,e,retregs,pretregs); 3188 } 3189 3190 3191 /*************************** 3192 * Perform a 'star' reference (indirection). 3193 */ 3194 3195 void cdind(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3196 { 3197 regm_t retregs; 3198 reg_t reg; 3199 uint nreg; 3200 3201 //printf("cdind(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 3202 tym_t tym = tybasic(e.Ety); 3203 if (tyfloating(tym)) 3204 { 3205 if (config.inline8087) 3206 { 3207 if (*pretregs & mST0) 3208 { 3209 cdind87(cdb, e, pretregs); 3210 return; 3211 } 3212 if (I64 && tym == TYcfloat && *pretregs & (ALLREGS | mBP)) 3213 { } 3214 else if (tycomplex(tym)) 3215 { 3216 cload87(cdb, e, pretregs); 3217 return; 3218 } 3219 3220 if (*pretregs & mPSW) 3221 { 3222 cdind87(cdb, e, pretregs); 3223 return; 3224 } 3225 } 3226 } 3227 3228 elem *e1 = e.EV.E1; 3229 assert(e1); 3230 switch (tym) 3231 { 3232 case TYstruct: 3233 case TYarray: 3234 // This case should never happen, why is it here? 3235 tym = TYnptr; // don't confuse allocreg() 3236 if (*pretregs & (mES | mCX) || e.Ety & mTYfar) 3237 tym = TYfptr; 3238 break; 3239 3240 default: 3241 break; 3242 } 3243 uint sz = _tysize[tym]; 3244 uint isbyte = tybyte(tym) != 0; 3245 3246 code cs; 3247 3248 getlvalue(cdb,&cs,e,RMload); // get addressing mode 3249 //printf("Irex = %02x, Irm = x%02x, Isib = x%02x\n", cs.Irex, cs.Irm, cs.Isib); 3250 //fprintf(stderr,"cd2 :\n"); WRcodlst(c); 3251 if (*pretregs == 0) 3252 { 3253 if (e.Ety & mTYvolatile) // do the load anyway 3254 *pretregs = regmask(e.Ety, 0); // load into registers 3255 else 3256 return; 3257 } 3258 3259 regm_t idxregs = idxregm(&cs); // mask of index regs used 3260 3261 if (*pretregs == mPSW) 3262 { 3263 if (!I16 && tym == TYfloat) 3264 { 3265 retregs = ALLREGS & ~idxregs; 3266 allocreg(cdb,&retregs,®,TYfloat); 3267 cs.Iop = 0x8B; 3268 code_newreg(&cs,reg); 3269 cdb.gen(&cs); // MOV reg,lsw 3270 cdb.gen2(0xD1,modregrmx(3,4,reg)); // SHL reg,1 3271 code_orflag(cdb.last(), CFpsw); 3272 } 3273 else if (sz <= REGSIZE) 3274 { 3275 cs.Iop = 0x81 ^ isbyte; 3276 cs.Irm |= modregrm(0,7,0); 3277 cs.IFL2 = FLconst; 3278 cs.IEV2.Vsize_t = 0; 3279 cdb.gen(&cs); // CMP [idx],0 3280 } 3281 else if (!I16 && sz == REGSIZE + 2) // if far pointer 3282 { 3283 retregs = ALLREGS & ~idxregs; 3284 allocreg(cdb,&retregs,®,TYint); 3285 cs.Iop = MOVZXw; 3286 cs.Irm |= modregrm(0,reg,0); 3287 getlvalue_msw(&cs); 3288 cdb.gen(&cs); // MOVZX reg,msw 3289 goto L4; 3290 } 3291 else if (sz <= 2 * REGSIZE) 3292 { 3293 retregs = ALLREGS & ~idxregs; 3294 allocreg(cdb,&retregs,®,TYint); 3295 cs.Iop = 0x8B; 3296 code_newreg(&cs,reg); 3297 getlvalue_msw(&cs); 3298 cdb.gen(&cs); // MOV reg,msw 3299 if (I32) 3300 { if (tym == TYdouble || tym == TYdouble_alias) 3301 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 3302 } 3303 else if (tym == TYfloat) 3304 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 3305 L4: 3306 cs.Iop = 0x0B; 3307 getlvalue_lsw(&cs); 3308 cs.Iflags |= CFpsw; 3309 cdb.gen(&cs); // OR reg,lsw 3310 } 3311 else if (!I32 && sz == 8) 3312 { 3313 *pretregs |= DOUBLEREGS_16; // fake it for now 3314 goto L1; 3315 } 3316 else 3317 { 3318 debug WRTYxx(tym); 3319 assert(0); 3320 } 3321 } 3322 else // else return result in reg 3323 { 3324 L1: 3325 retregs = *pretregs; 3326 if (sz == 8 && 3327 (retregs & (mPSW | mSTACK | ALLREGS | mBP)) == mSTACK) 3328 { int i; 3329 3330 // Optimizer should not CSE these, as the result is worse code! 3331 assert(!e.Ecount); 3332 3333 cs.Iop = 0xFF; 3334 cs.Irm |= modregrm(0,6,0); 3335 cs.IEV1.Voffset += 8 - REGSIZE; 3336 stackchanged = 1; 3337 i = 8 - REGSIZE; 3338 do 3339 { 3340 cdb.gen(&cs); // PUSH EA+i 3341 cdb.genadjesp(REGSIZE); 3342 cs.IEV1.Voffset -= REGSIZE; 3343 stackpush += REGSIZE; 3344 i -= REGSIZE; 3345 } 3346 while (i >= 0); 3347 goto L3; 3348 } 3349 if (I16 && sz == 8) 3350 retregs = DOUBLEREGS_16; 3351 3352 // Watch out for loading an lptr from an lptr! We must have 3353 // the offset loaded into a different register. 3354 /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes) 3355 retregs = ALLREGS;*/ 3356 3357 { 3358 assert(!isbyte || retregs & BYTEREGS); 3359 allocreg(cdb,&retregs,®,tym); // alloc registers 3360 } 3361 if (retregs & XMMREGS) 3362 { 3363 assert(sz == 4 || sz == 8 || sz == 16 || sz == 32); // float, double or vector 3364 cs.Iop = xmmload(tym); 3365 cs.Irex &= ~REX_W; 3366 code_newreg(&cs,reg - XMM0); 3367 checkSetVex(&cs,tym); 3368 cdb.gen(&cs); // MOV reg,[idx] 3369 } 3370 else if (sz <= REGSIZE) 3371 { 3372 cs.Iop = 0x8B; // MOV 3373 if (sz <= 2 && !I16 && 3374 config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed) 3375 { 3376 cs.Iop = tyuns(tym) ? MOVZXw : MOVSXw; // MOVZX/MOVSX 3377 cs.Iflags &= ~CFopsize; 3378 } 3379 cs.Iop ^= isbyte; 3380 L2: 3381 code_newreg(&cs,reg); 3382 cdb.gen(&cs); // MOV reg,[idx] 3383 if (isbyte && reg >= 4) 3384 code_orrex(cdb.last(), REX); 3385 } 3386 else if ((tym == TYfptr || tym == TYhptr) && retregs & mES) 3387 { 3388 cs.Iop = 0xC4; // LES reg,[idx] 3389 goto L2; 3390 } 3391 else if (sz <= 2 * REGSIZE) 3392 { uint lsreg; 3393 3394 cs.Iop = 0x8B; 3395 // Be careful not to interfere with index registers 3396 if (!I16) 3397 { 3398 // Can't handle if both result registers are used in 3399 // the addressing mode. 3400 if ((retregs & idxregs) == retregs) 3401 { 3402 retregs = mMSW & allregs & ~idxregs; 3403 if (!retregs) 3404 retregs |= mCX; 3405 retregs |= mLSW & ~idxregs; 3406 3407 // We can run out of registers, so if that's possible, 3408 // give us *one* of the idxregs 3409 if ((retregs & ~regcon.mvar & mLSW) == 0) 3410 { 3411 regm_t x = idxregs & mLSW; 3412 if (x) 3413 retregs |= mask(findreg(x)); // give us one idxreg 3414 } 3415 else if ((retregs & ~regcon.mvar & mMSW) == 0) 3416 { 3417 regm_t x = idxregs & mMSW; 3418 if (x) 3419 retregs |= mask(findreg(x)); // give us one idxreg 3420 } 3421 3422 allocreg(cdb,&retregs,®,tym); // alloc registers 3423 assert((retregs & idxregs) != retregs); 3424 } 3425 3426 lsreg = findreglsw(retregs); 3427 if (mask(reg) & idxregs) // reg is in addr mode 3428 { 3429 code_newreg(&cs,lsreg); 3430 cdb.gen(&cs); // MOV lsreg,lsw 3431 if (sz == REGSIZE + 2) 3432 cs.Iflags |= CFopsize; 3433 lsreg = reg; 3434 getlvalue_msw(&cs); // MOV reg,msw 3435 } 3436 else 3437 { 3438 code_newreg(&cs,reg); 3439 getlvalue_msw(&cs); 3440 cdb.gen(&cs); // MOV reg,msw 3441 if (sz == REGSIZE + 2) 3442 cdb.last().Iflags |= CFopsize; 3443 getlvalue_lsw(&cs); // MOV lsreg,lsw 3444 } 3445 NEWREG(cs.Irm,lsreg); 3446 cdb.gen(&cs); 3447 } 3448 else 3449 { 3450 // Index registers are always the lsw! 3451 cs.Irm |= modregrm(0,reg,0); 3452 getlvalue_msw(&cs); 3453 cdb.gen(&cs); // MOV reg,msw 3454 lsreg = findreglsw(retregs); 3455 NEWREG(cs.Irm,lsreg); 3456 getlvalue_lsw(&cs); // MOV lsreg,lsw 3457 cdb.gen(&cs); 3458 } 3459 } 3460 else if (I16 && sz == 8) 3461 { 3462 assert(reg == AX); 3463 cs.Iop = 0x8B; 3464 cs.IEV1.Voffset += 6; 3465 cdb.gen(&cs); // MOV AX,EA+6 3466 cs.Irm |= modregrm(0,CX,0); 3467 cs.IEV1.Voffset -= 4; 3468 cdb.gen(&cs); // MOV CX,EA+2 3469 NEWREG(cs.Irm,DX); 3470 cs.IEV1.Voffset -= 2; 3471 cdb.gen(&cs); // MOV DX,EA 3472 cs.IEV1.Voffset += 4; 3473 NEWREG(cs.Irm,BX); 3474 cdb.gen(&cs); // MOV BX,EA+4 3475 } 3476 else 3477 assert(0); 3478 L3: 3479 fixresult(cdb,e,retregs,pretregs); 3480 } 3481 //fprintf(stderr,"cdafter :\n"); WRcodlst(c); 3482 } 3483 3484 3485 3486 static if (!TARGET_SEGMENTED) 3487 { 3488 private code *cod2_setES(tym_t ty) { return null; } 3489 } 3490 else 3491 { 3492 /******************************** 3493 * Generate code to load ES with the right segment value, 3494 * do nothing if e is a far pointer. 3495 */ 3496 3497 private code *cod2_setES(tym_t ty) 3498 { 3499 int push; 3500 3501 CodeBuilder cdb; 3502 cdb.ctor(); 3503 switch (tybasic(ty)) 3504 { 3505 case TYnptr: 3506 if (!(config.flags3 & CFG3eseqds)) 3507 { push = 0x1E; // PUSH DS 3508 goto L1; 3509 } 3510 break; 3511 case TYcptr: 3512 push = 0x0E; // PUSH CS 3513 goto L1; 3514 case TYsptr: 3515 if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds)) 3516 { push = 0x16; // PUSH SS 3517 L1: 3518 // Must load ES 3519 getregs(cdb,mES); 3520 cdb.gen1(push); 3521 cdb.gen1(0x07); // POP ES 3522 } 3523 break; 3524 3525 default: 3526 break; 3527 } 3528 return cdb.finish(); 3529 } 3530 } 3531 3532 /******************************** 3533 * Generate code for intrinsic strlen(). 3534 */ 3535 3536 void cdstrlen(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3537 { 3538 /* Generate strlen in CX: 3539 LES DI,e1 3540 CLR AX ;scan for 0 3541 MOV CX,-1 ;largest possible string 3542 REPNE SCASB 3543 NOT CX 3544 DEC CX 3545 */ 3546 3547 regm_t retregs = mDI; 3548 tym_t ty1 = e.EV.E1.Ety; 3549 if (!tyreg(ty1)) 3550 retregs |= mES; 3551 codelem(cdb,e.EV.E1,&retregs,false); 3552 3553 // Make sure ES contains proper segment value 3554 cdb.append(cod2_setES(ty1)); 3555 3556 ubyte rex = I64 ? REX_W : 0; 3557 3558 getregs_imm(cdb,mAX | mCX); 3559 movregconst(cdb,AX,0,1); // MOV AL,0 3560 movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0); // MOV CX,-1 3561 getregs(cdb,mDI|mCX); 3562 cdb.gen1(0xF2); // REPNE 3563 cdb.gen1(0xAE); // SCASB 3564 genregs(cdb,0xF7,2,CX); // NOT CX 3565 code_orrex(cdb.last(), rex); 3566 if (I64) 3567 cdb.gen2(0xFF,(rex << 16) | modregrm(3,1,CX)); // DEC reg 3568 else 3569 cdb.gen1(0x48 + CX); // DEC CX 3570 3571 if (*pretregs & mPSW) 3572 { 3573 cdb.last().Iflags |= CFpsw; 3574 *pretregs &= ~mPSW; 3575 } 3576 fixresult(cdb,e,mCX,pretregs); 3577 } 3578 3579 3580 /********************************* 3581 * Generate code for strcmp(s1,s2) intrinsic. 3582 */ 3583 3584 void cdstrcmp(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3585 { 3586 char need_DS; 3587 int segreg; 3588 3589 /* 3590 MOV SI,s1 ;get destination pointer (s1) 3591 MOV CX,s1+2 3592 LES DI,s2 ;get source pointer (s2) 3593 PUSH DS 3594 MOV DS,CX 3595 CLR AX ;scan for 0 3596 MOV CX,-1 ;largest possible string 3597 REPNE SCASB 3598 NOT CX ;CX = string length of s2 3599 SUB DI,CX ;point DI back to beginning 3600 REPE CMPSB ;compare string 3601 POP DS 3602 JE L1 ;strings are equal 3603 SBB AX,AX 3604 SBB AX,-1 3605 L1: 3606 */ 3607 3608 regm_t retregs1 = mSI; 3609 tym_t ty1 = e.EV.E1.Ety; 3610 if (!tyreg(ty1)) 3611 retregs1 |= mCX; 3612 codelem(cdb,e.EV.E1,&retregs1,false); 3613 3614 regm_t retregs = mDI; 3615 tym_t ty2 = e.EV.E2.Ety; 3616 if (!tyreg(ty2)) 3617 retregs |= mES; 3618 scodelem(cdb,e.EV.E2,&retregs,retregs1,false); 3619 3620 // Make sure ES contains proper segment value 3621 cdb.append(cod2_setES(ty2)); 3622 getregs_imm(cdb,mAX | mCX); 3623 3624 ubyte rex = I64 ? REX_W : 0; 3625 3626 // Load DS with right value 3627 switch (tybasic(ty1)) 3628 { 3629 case TYnptr: 3630 case TYimmutPtr: 3631 need_DS = false; 3632 break; 3633 3634 case TYsptr: 3635 if (config.wflags & WFssneds) // if sptr can't use DS segment 3636 segreg = SEG_SS; 3637 else 3638 segreg = SEG_DS; 3639 goto L1; 3640 case TYcptr: 3641 segreg = SEG_CS; 3642 L1: 3643 cdb.gen1(0x1E); // PUSH DS 3644 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3645 cdb.gen1(0x1F); // POP DS 3646 need_DS = true; 3647 break; 3648 case TYfptr: 3649 case TYvptr: 3650 case TYhptr: 3651 cdb.gen1(0x1E); // PUSH DS 3652 cdb.gen2(0x8E,modregrm(3,SEG_DS,CX)); // MOV DS,CX 3653 need_DS = true; 3654 break; 3655 default: 3656 assert(0); 3657 } 3658 3659 movregconst(cdb,AX,0,0); // MOV AX,0 3660 movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0); // MOV CX,-1 3661 getregs(cdb,mSI|mDI|mCX); 3662 cdb.gen1(0xF2); // REPNE 3663 cdb.gen1(0xAE); // SCASB 3664 genregs(cdb,0xF7,2,CX); // NOT CX 3665 code_orrex(cdb.last(),rex); 3666 genregs(cdb,0x2B,DI,CX); // SUB DI,CX 3667 code_orrex(cdb.last(),rex); 3668 cdb.gen1(0xF3); // REPE 3669 cdb.gen1(0xA6); // CMPSB 3670 if (need_DS) 3671 cdb.gen1(0x1F); // POP DS 3672 code *c4 = gennop(null); 3673 if (*pretregs != mPSW) // if not flags only 3674 { 3675 genjmp(cdb,JE,FLcode,cast(block *) c4); // JE L1 3676 getregs(cdb,mAX); 3677 genregs(cdb,0x1B,AX,AX); // SBB AX,AX 3678 code_orrex(cdb.last(),rex); 3679 cdb.genc2(0x81,(rex << 16) | modregrm(3,3,AX),cast(targ_uns)-1); // SBB AX,-1 3680 } 3681 3682 *pretregs &= ~mPSW; 3683 cdb.append(c4); 3684 fixresult(cdb,e,mAX,pretregs); 3685 } 3686 3687 /********************************* 3688 * Generate code for memcmp(s1,s2,n) intrinsic. 3689 */ 3690 3691 void cdmemcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3692 { 3693 char need_DS; 3694 int segreg; 3695 3696 /* 3697 MOV SI,s1 ;get destination pointer (s1) 3698 MOV DX,s1+2 3699 LES DI,s2 ;get source pointer (s2) 3700 MOV CX,n ;get number of bytes to compare 3701 PUSH DS 3702 MOV DS,DX 3703 XOR AX,AX 3704 REPE CMPSB ;compare string 3705 POP DS 3706 JE L1 ;strings are equal 3707 SBB AX,AX 3708 SBB AX,-1 3709 L1: 3710 */ 3711 3712 elem *e1 = e.EV.E1; 3713 assert(e1.Eoper == OPparam); 3714 3715 // Get s1 into DX:SI 3716 regm_t retregs1 = mSI; 3717 tym_t ty1 = e1.EV.E1.Ety; 3718 if (!tyreg(ty1)) 3719 retregs1 |= mDX; 3720 codelem(cdb,e1.EV.E1,&retregs1,false); 3721 3722 // Get s2 into ES:DI 3723 regm_t retregs = mDI; 3724 tym_t ty2 = e1.EV.E2.Ety; 3725 if (!tyreg(ty2)) 3726 retregs |= mES; 3727 scodelem(cdb,e1.EV.E2,&retregs,retregs1,false); 3728 freenode(e1); 3729 3730 // Get nbytes into CX 3731 regm_t retregs3 = mCX; 3732 scodelem(cdb,e.EV.E2,&retregs3,retregs | retregs1,false); 3733 3734 // Make sure ES contains proper segment value 3735 cdb.append(cod2_setES(ty2)); 3736 3737 // Load DS with right value 3738 switch (tybasic(ty1)) 3739 { 3740 case TYnptr: 3741 case TYimmutPtr: 3742 need_DS = false; 3743 break; 3744 3745 case TYsptr: 3746 if (config.wflags & WFssneds) // if sptr can't use DS segment 3747 segreg = SEG_SS; 3748 else 3749 segreg = SEG_DS; 3750 goto L1; 3751 case TYcptr: 3752 segreg = SEG_CS; 3753 L1: 3754 cdb.gen1(0x1E); // PUSH DS 3755 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3756 cdb.gen1(0x1F); // POP DS 3757 need_DS = true; 3758 break; 3759 case TYfptr: 3760 case TYvptr: 3761 case TYhptr: 3762 cdb.gen1(0x1E); // PUSH DS 3763 cdb.gen2(0x8E,modregrm(3,SEG_DS,DX)); // MOV DS,DX 3764 need_DS = true; 3765 break; 3766 default: 3767 assert(0); 3768 } 3769 3770 static if (1) 3771 { 3772 getregs(cdb,mAX); 3773 cdb.gen2(0x33,modregrm(3,AX,AX)); // XOR AX,AX 3774 code_orflag(cdb.last(), CFpsw); // keep flags 3775 } 3776 else 3777 { 3778 if (*pretregs != mPSW) // if not flags only 3779 regwithvalue(cdb,mAX,0,null,0); // put 0 in AX 3780 } 3781 3782 getregs(cdb,mCX | mSI | mDI); 3783 cdb.gen1(0xF3); // REPE 3784 cdb.gen1(0xA6); // CMPSB 3785 if (need_DS) 3786 cdb.gen1(0x1F); // POP DS 3787 if (*pretregs != mPSW) // if not flags only 3788 { 3789 code *c4 = gennop(null); 3790 genjmp(cdb,JE,FLcode,cast(block *) c4); // JE L1 3791 getregs(cdb,mAX); 3792 genregs(cdb,0x1B,AX,AX); // SBB AX,AX 3793 cdb.genc2(0x81,modregrm(3,3,AX),cast(targ_uns)-1); // SBB AX,-1 3794 cdb.append(c4); 3795 } 3796 3797 *pretregs &= ~mPSW; 3798 fixresult(cdb,e,mAX,pretregs); 3799 } 3800 3801 /********************************* 3802 * Generate code for strcpy(s1,s2) intrinsic. 3803 */ 3804 3805 void cdstrcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3806 { 3807 char need_DS; 3808 int segreg; 3809 3810 /* 3811 LES DI,s2 ;ES:DI = s2 3812 CLR AX ;scan for 0 3813 MOV CX,-1 ;largest possible string 3814 REPNE SCASB ;find end of s2 3815 NOT CX ;CX = strlen(s2) + 1 (for EOS) 3816 SUB DI,CX 3817 MOV SI,DI 3818 PUSH DS 3819 PUSH ES 3820 LES DI,s1 3821 POP DS 3822 MOV AX,DI ;return value is s1 3823 REP MOVSB 3824 POP DS 3825 */ 3826 3827 stackchanged = 1; 3828 regm_t retregs = mDI; 3829 tym_t ty2 = tybasic(e.EV.E2.Ety); 3830 if (!tyreg(ty2)) 3831 retregs |= mES; 3832 ubyte rex = I64 ? REX_W : 0; 3833 codelem(cdb,e.EV.E2,&retregs,false); 3834 3835 // Make sure ES contains proper segment value 3836 cdb.append(cod2_setES(ty2)); 3837 getregs_imm(cdb,mAX | mCX); 3838 movregconst(cdb,AX,0,1); // MOV AL,0 3839 movregconst(cdb,CX,-1,I64?64:0); // MOV CX,-1 3840 getregs(cdb,mAX|mCX|mSI|mDI); 3841 cdb.gen1(0xF2); // REPNE 3842 cdb.gen1(0xAE); // SCASB 3843 genregs(cdb,0xF7,2,CX); // NOT CX 3844 code_orrex(cdb.last(),rex); 3845 genregs(cdb,0x2B,DI,CX); // SUB DI,CX 3846 code_orrex(cdb.last(),rex); 3847 genmovreg(cdb,SI,DI); // MOV SI,DI 3848 3849 // Load DS with right value 3850 switch (ty2) 3851 { 3852 case TYnptr: 3853 case TYimmutPtr: 3854 need_DS = false; 3855 break; 3856 3857 case TYsptr: 3858 if (config.wflags & WFssneds) // if sptr can't use DS segment 3859 segreg = SEG_SS; 3860 else 3861 segreg = SEG_DS; 3862 goto L1; 3863 case TYcptr: 3864 segreg = SEG_CS; 3865 L1: 3866 cdb.gen1(0x1E); // PUSH DS 3867 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3868 cdb.genadjesp(REGSIZE * 2); 3869 need_DS = true; 3870 break; 3871 case TYfptr: 3872 case TYvptr: 3873 case TYhptr: 3874 segreg = SEG_ES; 3875 goto L1; 3876 3877 default: 3878 assert(0); 3879 } 3880 3881 retregs = mDI; 3882 tym_t ty1 = tybasic(e.EV.E1.Ety); 3883 if (!tyreg(ty1)) 3884 retregs |= mES; 3885 scodelem(cdb,e.EV.E1,&retregs,mCX|mSI,false); 3886 getregs(cdb,mAX|mCX|mSI|mDI); 3887 3888 // Make sure ES contains proper segment value 3889 if (ty2 != TYnptr || ty1 != ty2) 3890 cdb.append(cod2_setES(ty1)); 3891 else 3892 {} // ES is already same as DS 3893 3894 if (need_DS) 3895 cdb.gen1(0x1F); // POP DS 3896 if (*pretregs) 3897 genmovreg(cdb,AX,DI); // MOV AX,DI 3898 cdb.gen1(0xF3); // REP 3899 cdb.gen1(0xA4); // MOVSB 3900 3901 if (need_DS) 3902 { cdb.gen1(0x1F); // POP DS 3903 cdb.genadjesp(-(REGSIZE * 2)); 3904 } 3905 fixresult(cdb,e,mAX | mES,pretregs); 3906 } 3907 3908 /********************************* 3909 * Generate code for memcpy(s1,s2,n) intrinsic. 3910 * OPmemcpy 3911 * / \ 3912 * s1 OPparam 3913 * / \ 3914 * s2 n 3915 */ 3916 3917 void cdmemcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3918 { 3919 char need_DS; 3920 int segreg; 3921 3922 /* 3923 MOV SI,s2 3924 MOV DX,s2+2 3925 MOV CX,n 3926 LES DI,s1 3927 PUSH DS 3928 MOV DS,DX 3929 MOV AX,DI ;return value is s1 3930 REP MOVSB 3931 POP DS 3932 */ 3933 3934 elem *e2 = e.EV.E2; 3935 assert(e2.Eoper == OPparam); 3936 3937 // Get s2 into DX:SI 3938 regm_t retregs2 = mSI; 3939 tym_t ty2 = e2.EV.E1.Ety; 3940 if (!tyreg(ty2)) 3941 retregs2 |= mDX; 3942 codelem(cdb,e2.EV.E1,&retregs2,false); 3943 3944 // Need to check if nbytes is 0 (OPconst of 0 would have been removed by elmemcpy()) 3945 const zeroCheck = e2.EV.E2.Eoper != OPconst; 3946 3947 // Get nbytes into CX 3948 regm_t retregs3 = mCX; 3949 scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false); 3950 freenode(e2); 3951 3952 // Get s1 into ES:DI 3953 regm_t retregs1 = mDI; 3954 tym_t ty1 = e.EV.E1.Ety; 3955 if (!tyreg(ty1)) 3956 retregs1 |= mES; 3957 scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false); 3958 3959 ubyte rex = I64 ? REX_W : 0; 3960 3961 // Make sure ES contains proper segment value 3962 cdb.append(cod2_setES(ty1)); 3963 3964 // Load DS with right value 3965 switch (tybasic(ty2)) 3966 { 3967 case TYnptr: 3968 case TYimmutPtr: 3969 need_DS = false; 3970 break; 3971 3972 case TYsptr: 3973 if (config.wflags & WFssneds) // if sptr can't use DS segment 3974 segreg = SEG_SS; 3975 else 3976 segreg = SEG_DS; 3977 goto L1; 3978 3979 case TYcptr: 3980 segreg = SEG_CS; 3981 L1: 3982 cdb.gen1(0x1E); // PUSH DS 3983 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3984 cdb.gen1(0x1F); // POP DS 3985 need_DS = true; 3986 break; 3987 3988 case TYfptr: 3989 case TYvptr: 3990 case TYhptr: 3991 cdb.gen1(0x1E); // PUSH DS 3992 cdb.gen2(0x8E,modregrm(3,SEG_DS,DX)); // MOV DS,DX 3993 need_DS = true; 3994 break; 3995 3996 default: 3997 assert(0); 3998 } 3999 4000 if (*pretregs) // if need return value 4001 { getregs(cdb,mAX); 4002 genmovreg(cdb,AX,DI); 4003 } 4004 4005 if (0 && I32 && config.flags4 & CFG4speed) 4006 { 4007 /* This is only faster if the memory is dword aligned, if not 4008 * it is significantly slower than just a rep movsb. 4009 */ 4010 /* mov EDX,ECX 4011 * shr ECX,2 4012 * jz L1 4013 * repe movsd 4014 * L1: nop 4015 * and EDX,3 4016 * jz L2 4017 * mov ECX,EDX 4018 * repe movsb 4019 * L2: nop 4020 */ 4021 getregs(cdb,mSI | mDI | mCX | mDX); 4022 genmovreg(cdb,DX,CX); // MOV EDX,ECX 4023 cdb.genc2(0xC1,modregrm(3,5,CX),2); // SHR ECX,2 4024 code *cx = gennop(null); 4025 genjmp(cdb, JE, FLcode, cast(block *)cx); // JZ L1 4026 cdb.gen1(0xF3); // REPE 4027 cdb.gen1(0xA5); // MOVSW 4028 cdb.append(cx); 4029 cdb.genc2(0x81, modregrm(3,4,DX),3); // AND EDX,3 4030 4031 code *cnop = gennop(null); 4032 genjmp(cdb, JE, FLcode, cast(block *)cnop); // JZ L2 4033 genmovreg(cdb,CX,DX); // MOV ECX,EDX 4034 cdb.gen1(0xF3); // REPE 4035 cdb.gen1(0xA4); // MOVSB 4036 cdb.append(cnop); 4037 } 4038 else 4039 { 4040 code* cnop; 4041 if (zeroCheck) 4042 { 4043 cnop = gennop(null); 4044 gentstreg(cdb,CX); // TEST ECX,ECX 4045 if (I64) 4046 code_orrex(cdb.last, REX_W); 4047 genjmp(cdb, JE, FLcode, cast(block *)cnop); // JZ cnop 4048 } 4049 4050 getregs(cdb,mSI | mDI | mCX); 4051 if (I16 && config.flags4 & CFG4speed) // if speed optimization 4052 { 4053 // Note this doesn't work if CX is 0 4054 cdb.gen2(0xD1,(rex << 16) | modregrm(3,5,CX)); // SHR CX,1 4055 cdb.gen1(0xF3); // REPE 4056 cdb.gen1(0xA5); // MOVSW 4057 cdb.gen2(0x11,(rex << 16) | modregrm(3,CX,CX)); // ADC CX,CX 4058 } 4059 cdb.gen1(0xF3); // REPE 4060 cdb.gen1(0xA4); // MOVSB 4061 if (zeroCheck) 4062 cdb.append(cnop); 4063 if (need_DS) 4064 cdb.gen1(0x1F); // POP DS 4065 } 4066 fixresult(cdb,e,mES|mAX,pretregs); 4067 } 4068 4069 4070 /********************************* 4071 * Generate code for memset(s,value,numbytes) intrinsic. 4072 * (s OPmemset (numbytes OPparam value)) 4073 */ 4074 4075 void cdmemset(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4076 { 4077 regm_t retregs1; 4078 regm_t retregs3; 4079 reg_t reg; 4080 reg_t vreg; 4081 tym_t ty1; 4082 int segreg; 4083 targ_uns numbytes; 4084 uint m; 4085 4086 //printf("cdmemset(*pretregs = %s)\n", regm_str(*pretregs)); 4087 elem *e2 = e.EV.E2; 4088 assert(e2.Eoper == OPparam); 4089 4090 elem* evalue = e2.EV.E2; 4091 elem* enumbytes = e2.EV.E1; 4092 4093 const grex = I64 ? (REX_W << 16) : 0; 4094 4095 bool valueIsConst = false; 4096 targ_size_t value; 4097 if (evalue.Eoper == OPconst) 4098 { 4099 value = el_tolong(evalue) & 0xFF; 4100 value |= value << 8; 4101 if (I32 || I64) 4102 { 4103 value |= value << 16; 4104 static if (value.sizeof == 8) 4105 if (I64) 4106 value |= value << 32; 4107 } 4108 valueIsConst = true; 4109 } 4110 else if (evalue.Eoper == OPstrpar) // happens if evalue is a struct of 0 size 4111 { 4112 value = 0; 4113 valueIsConst = true; 4114 } 4115 else 4116 value = 0xDEADBEEF; // stop annoying false positives that value is not inited 4117 4118 if (enumbytes.Eoper == OPconst) 4119 { 4120 static uint REP_THRESHOLD() { return REGSIZE * (6 + (REGSIZE == 4)); } 4121 numbytes = cast(uint)cast(targ_size_t)el_tolong(enumbytes); 4122 if (numbytes <= REP_THRESHOLD && 4123 !I16 && // doesn't work for 16 bits 4124 valueIsConst) 4125 { 4126 targ_uns offset = 0; 4127 retregs1 = *pretregs; 4128 if (!retregs1) 4129 retregs1 = ALLREGS; 4130 codelem(cdb,e.EV.E1,&retregs1,false); 4131 reg = findreg(retregs1); 4132 if (evalue.Eoper == OPconst) 4133 { 4134 const uint mrm = buildModregrm(0,0,reg); 4135 switch (numbytes) 4136 { 4137 case 4: // MOV [reg],imm32 4138 cdb.genc2(0xC7,mrm,value); 4139 goto fixres; 4140 case 2: // MOV [reg],imm16 4141 cdb.genc2(0xC7,mrm,value); 4142 cdb.last().Iflags = CFopsize; 4143 goto fixres; 4144 case 1: // MOV [reg],imm8 4145 cdb.genc2(0xC6,mrm,value); 4146 goto fixres; 4147 4148 default: 4149 break; 4150 } 4151 } 4152 4153 regwithvalue(cdb, BYTEREGS & ~retregs1, value, &vreg, I64 ? 64 : 0); 4154 freenode(evalue); 4155 freenode(e2); 4156 4157 m = grex | buildModregrm(2,vreg,reg); 4158 while (numbytes >= REGSIZE) 4159 { // MOV dword ptr offset[reg],vreg 4160 cdb.gen2(0x89,m); 4161 cdb.last().IEV1.Voffset = offset; 4162 cdb.last().IFL1 = FLconst; 4163 numbytes -= REGSIZE; 4164 offset += REGSIZE; 4165 } 4166 m &= ~grex; 4167 if (numbytes & 4) 4168 { // MOV dword ptr offset[reg],vreg 4169 cdb.gen2(0x89,m); 4170 cdb.last().IEV1.Voffset = offset; 4171 cdb.last().IFL1 = FLconst; 4172 offset += 4; 4173 } 4174 if (numbytes & 2) 4175 { // MOV word ptr offset[reg],vreg 4176 cdb.gen2(0x89,m); 4177 cdb.last().IEV1.Voffset = offset; 4178 cdb.last().IFL1 = FLconst; 4179 cdb.last().Iflags = CFopsize; 4180 offset += 2; 4181 } 4182 if (numbytes & 1) 4183 { // MOV byte ptr offset[reg],vreg 4184 cdb.gen2(0x88,m); 4185 cdb.last().IEV1.Voffset = offset; 4186 cdb.last().IFL1 = FLconst; 4187 if (I64 && vreg >= 4) 4188 cdb.last().Irex |= REX; 4189 } 4190 fixres: 4191 fixresult(cdb,e,retregs1,pretregs); 4192 return; 4193 } 4194 } 4195 4196 // Get nbytes into CX 4197 regm_t retregs2 = 0; 4198 if (enumbytes.Eoper != OPconst) 4199 { 4200 retregs2 = mCX; 4201 codelem(cdb,enumbytes,&retregs2,false); 4202 } 4203 4204 // Get value into AX 4205 retregs3 = mAX; 4206 if (valueIsConst) 4207 { 4208 regwithvalue(cdb, mAX, value, null, I64?64:0); 4209 freenode(evalue); 4210 } 4211 else 4212 { 4213 scodelem(cdb,evalue,&retregs3,retregs2,false); 4214 4215 getregs(cdb,mAX); 4216 if (I16) 4217 { 4218 cdb.gen2(0x8A,modregrm(3,AH,AL)); // MOV AH,AL 4219 } 4220 else if (I32) 4221 { 4222 genregs(cdb,MOVZXb,AX,AX); // MOVZX EAX,AL 4223 cdb.genc2(0x69,modregrm(3,AX,AX),0x01010101); // IMUL EAX,EAX,0x01010101 4224 } 4225 else 4226 { 4227 genregs(cdb,MOVZXb,AX,AX); // MOVZX EAX,AL 4228 regm_t regm = allregs & ~(mAX | retregs2); 4229 reg_t r; 4230 regwithvalue(cdb,regm,cast(targ_size_t)0x01010101_01010101,&r,64); // MOV reg,0x01010101_01010101 4231 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r)); // IMUL RAX,reg 4232 } 4233 } 4234 freenode(e2); 4235 4236 // Get s into ES:DI 4237 retregs1 = mDI; 4238 ty1 = e.EV.E1.Ety; 4239 if (!tyreg(ty1)) 4240 retregs1 |= mES; 4241 scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false); 4242 reg = DI; //findreg(retregs1); 4243 4244 // Make sure ES contains proper segment value 4245 cdb.append(cod2_setES(ty1)); 4246 4247 if (*pretregs) // if need return value 4248 { 4249 getregs(cdb,mBX); 4250 genmovreg(cdb,BX,DI); // MOV EBX,EDI 4251 } 4252 4253 4254 if (enumbytes.Eoper == OPconst) 4255 { 4256 getregs(cdb,mDI); 4257 if (const numwords = numbytes / REGSIZE) 4258 { 4259 regwithvalue(cdb,mCX,numwords,null, I64 ? 64 : 0); 4260 getregs(cdb,mCX); 4261 cdb.gen1(0xF3); // REP 4262 cdb.gen1(STOS); // STOSW/D/Q 4263 if (I64) 4264 code_orrex(cdb.last(), REX_W); 4265 regimmed_set(CX, 0); // CX is now 0 4266 } 4267 4268 auto remainder = numbytes & (REGSIZE - 1); 4269 if (I64 && remainder >= 4) 4270 { 4271 cdb.gen1(STOS); // STOSD 4272 remainder -= 4; 4273 } 4274 for (; remainder; --remainder) 4275 cdb.gen1(STOSB); // STOSB 4276 fixresult(cdb,e,mES|mBX,pretregs); 4277 return; 4278 } 4279 4280 getregs(cdb,mDI | mCX); 4281 if (I16) 4282 { 4283 if (config.flags4 & CFG4speed) // if speed optimization 4284 { 4285 cdb.gen2(0xD1,modregrm(3,5,CX)); // SHR CX,1 4286 cdb.gen1(0xF3); // REP 4287 cdb.gen1(STOS); // STOSW 4288 cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC CX,CX 4289 } 4290 cdb.gen1(0xF3); // REP 4291 cdb.gen1(STOSB); // STOSB 4292 regimmed_set(CX, 0); // CX is now 0 4293 fixresult(cdb,e,mES|mBX,pretregs); 4294 return; 4295 } 4296 4297 /* MOV sreg,ECX 4298 SHR ECX,n 4299 REP 4300 STOSD/Q 4301 4302 ADC ECX,ECX 4303 REP 4304 STOSD 4305 4306 MOV ECX,sreg 4307 AND ECX,3 4308 REP 4309 STOSB 4310 */ 4311 regm_t regs = allregs & (*pretregs ? ~(mAX|mBX|mCX|mDI) : ~(mAX|mCX|mDI)); 4312 reg_t sreg; 4313 allocreg(cdb,®s,&sreg,TYint); 4314 genregs(cdb,0x89,CX,sreg); // MOV sreg,ECX (32 bits only) 4315 4316 const n = I64 ? 3 : 2; 4317 cdb.genc2(0xC1, grex | modregrm(3,5,CX), n); // SHR ECX,n 4318 4319 cdb.gen1(0xF3); // REP 4320 cdb.gen1(STOS); // STOSD/Q 4321 if (I64) 4322 code_orrex(cdb.last(), REX_W); 4323 4324 if (I64) 4325 { 4326 cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC ECX,ECX 4327 cdb.gen1(0xF3); // REP 4328 cdb.gen1(STOS); // STOSD 4329 } 4330 4331 genregs(cdb,0x89,sreg,CX); // MOV ECX,sreg (32 bits only) 4332 cdb.genc2(0x81, modregrm(3,4,CX), 3); // AND ECX,3 4333 cdb.gen1(0xF3); // REP 4334 cdb.gen1(STOSB); // STOSB 4335 4336 regimmed_set(CX, 0); // CX is now 0 4337 fixresult(cdb,e,mES|mBX,pretregs); 4338 } 4339 4340 4341 /********************** 4342 * Do structure assignments. 4343 * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2). 4344 * Mebbe call cdstreq() for double assignments??? 4345 */ 4346 4347 void cdstreq(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4348 { 4349 char need_DS = false; 4350 elem *e1 = e.EV.E1; 4351 elem *e2 = e.EV.E2; 4352 int segreg; 4353 uint numbytes = cast(uint)type_size(e.ET); // # of bytes in structure/union 4354 ubyte rex = I64 ? REX_W : 0; 4355 4356 //printf("cdstreq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4357 4358 // First, load pointer to rvalue into SI 4359 regm_t srcregs = mSI; // source is DS:SI 4360 docommas(cdb,&e2); 4361 if (e2.Eoper == OPind) // if (.. = *p) 4362 { elem *e21 = e2.EV.E1; 4363 4364 segreg = SEG_DS; 4365 switch (tybasic(e21.Ety)) 4366 { 4367 case TYsptr: 4368 if (config.wflags & WFssneds) // if sptr can't use DS segment 4369 segreg = SEG_SS; 4370 break; 4371 case TYcptr: 4372 if (!(config.exe & EX_flat)) 4373 segreg = SEG_CS; 4374 break; 4375 case TYfptr: 4376 case TYvptr: 4377 case TYhptr: 4378 srcregs |= mCX; // get segment also 4379 need_DS = true; 4380 break; 4381 4382 default: 4383 break; 4384 } 4385 codelem(cdb,e21,&srcregs,false); 4386 freenode(e2); 4387 if (segreg != SEG_DS) // if not DS 4388 { 4389 getregs(cdb,mCX); 4390 cdb.gen2(0x8C,modregrm(3,segreg,CX)); // MOV CX,segreg 4391 need_DS = true; 4392 } 4393 } 4394 else if (e2.Eoper == OPvar) 4395 { 4396 if (e2.EV.Vsym.ty() & mTYfar) // if e2 is in a far segment 4397 { srcregs |= mCX; // get segment also 4398 need_DS = true; 4399 cdrelconst(cdb,e2,&srcregs); 4400 } 4401 else 4402 { 4403 segreg = segfl[el_fl(e2)]; 4404 if ((config.wflags & WFssneds) && segreg == SEG_SS || // if source is on stack 4405 segreg == SEG_CS) // if source is in CS 4406 { 4407 need_DS = true; // we need to reload DS 4408 // Load CX with segment 4409 srcregs |= mCX; 4410 getregs(cdb,mCX); 4411 cdb.gen2(0x8C, // MOV CX,[SS|CS] 4412 modregrm(3,segreg,CX)); 4413 } 4414 cdrelconst(cdb,e2,&srcregs); 4415 } 4416 freenode(e2); 4417 } 4418 else 4419 { 4420 if (!(config.exe & EX_flat)) 4421 { need_DS = true; 4422 srcregs |= mCX; 4423 } 4424 codelem(cdb,e2,&srcregs,false); 4425 } 4426 4427 // now get pointer to lvalue (destination) in ES:DI 4428 regm_t dstregs = (config.exe & EX_flat) ? mDI : mES|mDI; 4429 if (e1.Eoper == OPind) // if (*p = ..) 4430 { 4431 if (tyreg(e1.EV.E1.Ety)) 4432 dstregs = mDI; 4433 cdb.append(cod2_setES(e1.EV.E1.Ety)); 4434 scodelem(cdb,e1.EV.E1,&dstregs,srcregs,false); 4435 } 4436 else 4437 cdrelconst(cdb,e1,&dstregs); 4438 freenode(e1); 4439 4440 getregs(cdb,(srcregs | dstregs) & (mLSW | mDI)); 4441 if (need_DS) 4442 { assert(!(config.exe & EX_flat)); 4443 cdb.gen1(0x1E); // PUSH DS 4444 cdb.gen2(0x8E,modregrm(3,SEG_DS,CX)); // MOV DS,CX 4445 } 4446 if (numbytes <= REGSIZE * (6 + (REGSIZE == 4))) 4447 { 4448 while (numbytes >= REGSIZE) 4449 { 4450 cdb.gen1(0xA5); // MOVSW 4451 code_orrex(cdb.last(), rex); 4452 numbytes -= REGSIZE; 4453 } 4454 //if (numbytes) 4455 // printf("cdstreq numbytes %d\n",numbytes); 4456 if (I64 && numbytes >= 4) 4457 { 4458 cdb.gen1(0xA5); // MOVSD 4459 numbytes -= 4; 4460 } 4461 while (numbytes--) 4462 cdb.gen1(0xA4); // MOVSB 4463 } 4464 else 4465 { 4466 static if (1) 4467 { 4468 uint remainder = numbytes & (REGSIZE - 1); 4469 numbytes /= REGSIZE; // number of words 4470 getregs_imm(cdb,mCX); 4471 movregconst(cdb,CX,numbytes,0); // # of bytes/words 4472 cdb.gen1(0xF3); // REP 4473 if (REGSIZE == 8) 4474 cdb.gen1(REX | REX_W); 4475 cdb.gen1(0xA5); // REP MOVSD 4476 regimmed_set(CX,0); // note that CX == 0 4477 if (I64 && remainder >= 4) 4478 { 4479 cdb.gen1(0xA5); // MOVSD 4480 remainder -= 4; 4481 } 4482 for (; remainder; remainder--) 4483 { 4484 cdb.gen1(0xA4); // MOVSB 4485 } 4486 } 4487 else 4488 { 4489 uint movs; 4490 if (numbytes & (REGSIZE - 1)) // if odd 4491 movs = 0xA4; // MOVSB 4492 else 4493 { 4494 movs = 0xA5; // MOVSW 4495 numbytes /= REGSIZE; // # of words 4496 } 4497 getregs_imm(cdb,mCX); 4498 movregconst(cdb,CX,numbytes,0); // # of bytes/words 4499 cdb.gen1(0xF3); // REP 4500 cdb.gen1(movs); 4501 regimmed_set(CX,0); // note that CX == 0 4502 } 4503 } 4504 if (need_DS) 4505 cdb.gen1(0x1F); // POP DS 4506 assert(!(*pretregs & mPSW)); 4507 if (*pretregs) 4508 { // ES:DI points past what we want 4509 4510 cdb.genc2(0x81,(rex << 16) | modregrm(3,5,DI), type_size(e.ET)); // SUB DI,numbytes 4511 regm_t retregs = mDI; 4512 if (*pretregs & mMSW && !(config.exe & EX_flat)) 4513 retregs |= mES; 4514 fixresult(cdb,e,retregs,pretregs); 4515 } 4516 } 4517 4518 4519 /********************** 4520 * Get the address of. 4521 * Is also called by cdstreq() to set up pointer to a structure. 4522 */ 4523 4524 void cdrelconst(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4525 { 4526 //printf("cdrelconst(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4527 4528 /* The following should not happen, but cgelem.c is a little stupid. 4529 * Assertion can be tripped by func("string" == 0); and similar 4530 * things. Need to add goals to optelem() to fix this completely. 4531 */ 4532 //assert((*pretregs & mPSW) == 0); 4533 if (*pretregs & mPSW) 4534 { 4535 *pretregs &= ~mPSW; 4536 gentstreg(cdb,SP); // SP is never 0 4537 if (I64) 4538 code_orrex(cdb.last(), REX_W); 4539 } 4540 if (!*pretregs) 4541 return; 4542 4543 assert(e); 4544 tym_t tym = tybasic(e.Ety); 4545 switch (tym) 4546 { 4547 case TYstruct: 4548 case TYarray: 4549 case TYldouble: 4550 case TYildouble: 4551 case TYcldouble: 4552 tym = TYnptr; // don't confuse allocreg() 4553 if (*pretregs & (mES | mCX) || e.Ety & mTYfar) 4554 { 4555 tym = TYfptr; 4556 } 4557 break; 4558 4559 case TYifunc: 4560 tym = TYfptr; 4561 break; 4562 4563 default: 4564 if (tyfunc(tym)) 4565 tym = 4566 tyfarfunc(tym) ? TYfptr : 4567 TYnptr; 4568 break; 4569 } 4570 //assert(tym & typtr); // don't fail on (int)&a 4571 4572 SC sclass; 4573 reg_t mreg, // segment of the address (TYfptrs only) 4574 lreg; // offset of the address 4575 4576 allocreg(cdb,pretregs,&lreg,tym); 4577 if (_tysize[tym] > REGSIZE) // fptr could've been cast to long 4578 { 4579 if (*pretregs & mES) 4580 { 4581 /* Do not allocate CX or SI here, as cdstreq() needs 4582 * them preserved. cdstreq() should use scodelem() 4583 */ 4584 mreg = allocScratchReg(cdb, (mAX|mBX|mDX|mDI) & ~mask(lreg)); 4585 } 4586 else 4587 { 4588 mreg = lreg; 4589 lreg = findreglsw(*pretregs); 4590 } 4591 4592 /* if (get segment of function that isn't necessarily in the 4593 * current segment (i.e. CS doesn't have the right value in it) 4594 */ 4595 Symbol *s = e.EV.Vsym; 4596 if (s.Sfl == FLdatseg) 4597 { assert(0); 4598 } 4599 sclass = cast(SC) s.Sclass; 4600 const ety = tybasic(s.ty()); 4601 if ((tyfarfunc(ety) || ety == TYifunc) && 4602 (sclass == SCextern || ClassInline(sclass) || config.wflags & WFthunk) 4603 || s.Sfl == FLfardata 4604 || (s.ty() & mTYcs && s.Sseg != cseg && (LARGECODE || s.Sclass == SCcomdat)) 4605 ) 4606 { // MOV mreg,seg of symbol 4607 cdb.gencs(0xB8 + mreg,0,FLextern,s); 4608 cdb.last().Iflags = CFseg; 4609 } 4610 else 4611 { 4612 const fl = (s.ty() & mTYcs) ? FLcsdata : s.Sfl; 4613 cdb.gen2(0x8C, // MOV mreg,SEG REGISTER 4614 modregrm(3,segfl[fl],mreg)); 4615 } 4616 if (*pretregs & mES) 4617 cdb.gen2(0x8E,modregrm(3,0,mreg)); // MOV ES,mreg 4618 } 4619 getoffset(cdb,e,lreg); 4620 } 4621 4622 /********************************* 4623 * Load the offset portion of the address represented by e into 4624 * reg. 4625 */ 4626 4627 void getoffset(ref CodeBuilder cdb,elem *e,reg_t reg) 4628 { 4629 //printf("getoffset(e = %p, reg = %d)\n", e, reg); 4630 code cs = void; 4631 cs.Iflags = 0; 4632 ubyte rex = 0; 4633 cs.Irex = rex; 4634 assert(e.Eoper == OPvar || e.Eoper == OPrelconst); 4635 auto fl = el_fl(e); 4636 switch (fl) 4637 { 4638 case FLdatseg: 4639 cs.IEV2.Vpointer = e.EV.Vpointer; 4640 goto L3; 4641 4642 case FLfardata: 4643 goto L4; 4644 4645 case FLtlsdata: 4646 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 4647 { 4648 { 4649 L5: 4650 if (config.flags3 & CFG3pic) 4651 { 4652 if (I64) 4653 { 4654 /* Generate: 4655 * LEA DI,s@TLSGD[RIP] 4656 */ 4657 //assert(reg == DI); 4658 code css = void; 4659 css.Irex = REX | REX_W; 4660 css.Iop = LEA; 4661 css.Irm = modregrm(0,reg,5); 4662 if (reg & 8) 4663 css.Irex |= REX_R; 4664 css.Iflags = CFopsize; 4665 css.IFL1 = cast(ubyte)fl; 4666 css.IEV1.Vsym = e.EV.Vsym; 4667 css.IEV1.Voffset = e.EV.Voffset; 4668 cdb.gen(&css); 4669 } 4670 else 4671 { 4672 /* Generate: 4673 * LEA EAX,s@TLSGD[1*EBX+0] 4674 */ 4675 assert(reg == AX); 4676 load_localgot(cdb); 4677 code css = void; 4678 css.Iflags = 0; 4679 css.Iop = LEA; // LEA 4680 css.Irex = 0; 4681 css.Irm = modregrm(0,AX,4); 4682 css.Isib = modregrm(0,BX,5); 4683 css.IFL1 = cast(ubyte)fl; 4684 css.IEV1.Vsym = e.EV.Vsym; 4685 css.IEV1.Voffset = e.EV.Voffset; 4686 cdb.gen(&css); 4687 } 4688 return; 4689 } 4690 /* Generate: 4691 * MOV reg,GS:[00000000] 4692 * ADD reg, offset s@TLS_LE 4693 * for locals, and for globals: 4694 * MOV reg,GS:[00000000] 4695 * ADD reg, s@TLS_IE 4696 * note different fixup 4697 */ 4698 int stack = 0; 4699 if (reg == STACK) 4700 { regm_t retregs = ALLREGS; 4701 4702 reg_t regx; 4703 allocreg(cdb,&retregs,®x,TYoffset); 4704 reg = findreg(retregs); 4705 stack = 1; 4706 } 4707 4708 code css = void; 4709 css.Irex = rex; 4710 css.Iop = 0x8B; 4711 css.Irm = modregrm(0, 0, BPRM); 4712 code_newreg(&css, reg); 4713 css.Iflags = CFgs; 4714 css.IFL1 = FLconst; 4715 css.IEV1.Vuns = 0; 4716 cdb.gen(&css); // MOV reg,GS:[00000000] 4717 4718 if (e.EV.Vsym.Sclass == SCstatic || e.EV.Vsym.Sclass == SClocstat) 4719 { // ADD reg, offset s 4720 cs.Irex = rex; 4721 cs.Iop = 0x81; 4722 cs.Irm = modregrm(3,0,reg & 7); 4723 if (reg & 8) 4724 cs.Irex |= REX_B; 4725 cs.Iflags = CFoff; 4726 cs.IFL2 = cast(ubyte)fl; 4727 cs.IEV2.Vsym = e.EV.Vsym; 4728 cs.IEV2.Voffset = e.EV.Voffset; 4729 } 4730 else 4731 { // ADD reg, s 4732 cs.Irex = rex; 4733 cs.Iop = 0x03; 4734 cs.Irm = modregrm(0,0,BPRM); 4735 code_newreg(&cs, reg); 4736 cs.Iflags = CFoff; 4737 cs.IFL1 = cast(ubyte)fl; 4738 cs.IEV1.Vsym = e.EV.Vsym; 4739 cs.IEV1.Voffset = e.EV.Voffset; 4740 } 4741 cdb.gen(&cs); // ADD reg, xxxx 4742 4743 if (stack) 4744 { 4745 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4746 if (reg & 8) 4747 code_orrex(cdb.last(), REX_B); 4748 cdb.genadjesp(REGSIZE); 4749 stackchanged = 1; 4750 } 4751 break; 4752 } 4753 } 4754 else static if (TARGET_WINDOS) 4755 { 4756 if (I64) 4757 { 4758 L5: 4759 assert(reg != STACK); 4760 cs.IEV2.Vsym = e.EV.Vsym; 4761 cs.IEV2.Voffset = e.EV.Voffset; 4762 cs.Iop = 0xB8 + (reg & 7); // MOV Ereg,offset s 4763 if (reg & 8) 4764 cs.Irex |= REX_B; 4765 cs.Iflags = CFoff; // want offset only 4766 cs.IFL2 = cast(ubyte)fl; 4767 cdb.gen(&cs); 4768 break; 4769 } 4770 goto L4; 4771 } 4772 else 4773 { 4774 goto L4; 4775 } 4776 4777 case FLfunc: 4778 fl = FLextern; /* don't want PC relative addresses */ 4779 goto L4; 4780 4781 case FLextern: 4782 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 4783 { 4784 if (e.EV.Vsym.ty() & mTYthread) 4785 goto L5; 4786 } 4787 static if (TARGET_WINDOS) 4788 { 4789 if (I64 && e.EV.Vsym.ty() & mTYthread) 4790 goto L5; 4791 } 4792 goto L4; 4793 4794 case FLdata: 4795 case FLudata: 4796 case FLgot: 4797 case FLgotoff: 4798 case FLcsdata: 4799 L4: 4800 cs.IEV2.Vsym = e.EV.Vsym; 4801 cs.IEV2.Voffset = e.EV.Voffset; 4802 L3: 4803 if (reg == STACK) 4804 { stackchanged = 1; 4805 cs.Iop = 0x68; /* PUSH immed16 */ 4806 cdb.genadjesp(REGSIZE); 4807 } 4808 else 4809 { cs.Iop = 0xB8 + (reg & 7); // MOV reg,immed16 4810 if (reg & 8) 4811 cs.Irex |= REX_B; 4812 if (I64) 4813 { cs.Irex |= REX_W; 4814 if (config.flags3 & CFG3pic || config.exe == EX_WIN64) 4815 { // LEA reg,immed32[RIP] 4816 cs.Iop = LEA; 4817 cs.Irm = modregrm(0,reg & 7,5); 4818 if (reg & 8) 4819 cs.Irex = (cs.Irex & ~REX_B) | REX_R; 4820 cs.IFL1 = cast(ubyte)fl; 4821 cs.IEV1.Vsym = cs.IEV2.Vsym; 4822 cs.IEV1.Voffset = cs.IEV2.Voffset; 4823 } 4824 } 4825 } 4826 cs.Iflags = CFoff; /* want offset only */ 4827 cs.IFL2 = cast(ubyte)fl; 4828 cdb.gen(&cs); 4829 break; 4830 4831 case FLreg: 4832 /* Allow this since the tree optimizer puts & in front of */ 4833 /* register doubles. */ 4834 goto L2; 4835 case FLauto: 4836 case FLfast: 4837 case FLbprel: 4838 case FLfltreg: 4839 reflocal = true; 4840 goto L2; 4841 case FLpara: 4842 refparam = true; 4843 L2: 4844 if (reg == STACK) 4845 { regm_t retregs = ALLREGS; 4846 4847 reg_t regx; 4848 allocreg(cdb,&retregs,®x,TYoffset); 4849 reg = findreg(retregs); 4850 loadea(cdb,e,&cs,LEA,reg,0,0,0); // LEA reg,EA 4851 if (I64) 4852 code_orrex(cdb.last(), REX_W); 4853 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4854 if (reg & 8) 4855 code_orrex(cdb.last(), REX_B); 4856 cdb.genadjesp(REGSIZE); 4857 stackchanged = 1; 4858 } 4859 else 4860 { 4861 loadea(cdb,e,&cs,LEA,reg,0,0,0); // LEA reg,EA 4862 if (I64) 4863 code_orrex(cdb.last(), REX_W); 4864 } 4865 break; 4866 4867 default: 4868 debug 4869 { 4870 elem_print(e); 4871 WRFL(fl); 4872 } 4873 assert(0); 4874 } 4875 } 4876 4877 4878 /****************** 4879 * OPneg, OPsqrt, OPsin, OPcos, OPrint 4880 */ 4881 4882 void cdneg(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4883 { 4884 //printf("cdneg()\n"); 4885 //elem_print(e); 4886 if (*pretregs == 0) 4887 { 4888 codelem(cdb,e.EV.E1,pretregs,false); 4889 return; 4890 } 4891 const tyml = tybasic(e.EV.E1.Ety); 4892 const sz = _tysize[tyml]; 4893 if (tyfloating(tyml)) 4894 { 4895 if (tycomplex(tyml)) 4896 { 4897 neg_complex87(cdb, e, pretregs); 4898 return; 4899 } 4900 if (tyxmmreg(tyml) && e.Eoper == OPneg && *pretregs & XMMREGS) 4901 { 4902 xmmneg(cdb,e,pretregs); 4903 return; 4904 } 4905 if (config.inline8087 && 4906 ((*pretregs & (ALLREGS | mBP)) == 0 || e.Eoper == OPsqrt || I64)) 4907 { 4908 neg87(cdb,e,pretregs); 4909 return; 4910 } 4911 regm_t retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 4912 codelem(cdb,e.EV.E1,&retregs,false); 4913 getregs(cdb,retregs); 4914 if (I32) 4915 { 4916 const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 4917 cdb.genc2(0x81,modregrm(3,6,reg),0x80000000); // XOR EDX,sign bit 4918 } 4919 else 4920 { 4921 const reg = (sz == 8) ? AX : findregmsw(retregs); 4922 cdb.genc2(0x81,modregrm(3,6,reg),0x8000); // XOR AX,0x8000 4923 } 4924 fixresult(cdb,e,retregs,pretregs); 4925 return; 4926 } 4927 4928 const uint isbyte = sz == 1; 4929 const possregs = (isbyte) ? BYTEREGS : allregs; 4930 regm_t retregs = *pretregs & possregs; 4931 if (retregs == 0) 4932 retregs = possregs; 4933 codelem(cdb,e.EV.E1,&retregs,false); 4934 getregs(cdb,retregs); // retregs will be destroyed 4935 if (sz <= REGSIZE) 4936 { 4937 const reg = findreg(retregs); 4938 uint rex = (I64 && sz == 8) ? REX_W : 0; 4939 if (I64 && sz == 1 && reg >= 4) 4940 rex |= REX; 4941 cdb.gen2(0xF7 ^ isbyte,(rex << 16) | modregrmx(3,3,reg)); // NEG reg 4942 if (!I16 && _tysize[tyml] == SHORTSIZE && *pretregs & mPSW) 4943 cdb.last().Iflags |= CFopsize | CFpsw; 4944 *pretregs &= mBP | ALLREGS; // flags already set 4945 } 4946 else if (sz == 2 * REGSIZE) 4947 { 4948 const msreg = findregmsw(retregs); 4949 cdb.gen2(0xF7,modregrm(3,3,msreg)); // NEG msreg 4950 const lsreg = findreglsw(retregs); 4951 cdb.gen2(0xF7,modregrm(3,3,lsreg)); // NEG lsreg 4952 code_orflag(cdb.last(), CFpsw); // need flag result of previous NEG 4953 cdb.genc2(0x81,modregrm(3,3,msreg),0); // SBB msreg,0 4954 } 4955 else 4956 assert(0); 4957 fixresult(cdb,e,retregs,pretregs); 4958 } 4959 4960 4961 /****************** 4962 * Absolute value operator 4963 */ 4964 4965 4966 void cdabs(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 4967 { 4968 //printf("cdabs(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4969 if (*pretregs == 0) 4970 { 4971 codelem(cdb,e.EV.E1,pretregs,false); 4972 return; 4973 } 4974 const tyml = tybasic(e.EV.E1.Ety); 4975 const sz = _tysize[tyml]; 4976 const rex = (I64 && sz == 8) ? REX_W : 0; 4977 if (tyfloating(tyml)) 4978 { 4979 if (tyxmmreg(tyml) && *pretregs & XMMREGS) 4980 { 4981 xmmabs(cdb,e,pretregs); 4982 return; 4983 } 4984 if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64)) 4985 { 4986 neg87(cdb,e,pretregs); 4987 return; 4988 } 4989 regm_t retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 4990 codelem(cdb,e.EV.E1,&retregs,false); 4991 getregs(cdb,retregs); 4992 if (I32) 4993 { 4994 const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 4995 cdb.genc2(0x81,modregrm(3,4,reg),0x7FFFFFFF); // AND EDX,~sign bit 4996 } 4997 else 4998 { 4999 const reg = (sz == 8) ? AX : findregmsw(retregs); 5000 cdb.genc2(0x81,modregrm(3,4,reg),0x7FFF); // AND AX,0x7FFF 5001 } 5002 fixresult(cdb,e,retregs,pretregs); 5003 return; 5004 } 5005 5006 const uint isbyte = sz == 1; 5007 assert(isbyte == 0); 5008 regm_t possregs = (sz <= REGSIZE) ? cast(regm_t) mAX : allregs; 5009 if (!I16 && sz == REGSIZE) 5010 possregs = allregs; 5011 regm_t retregs = *pretregs & possregs; 5012 if (retregs == 0) 5013 retregs = possregs; 5014 codelem(cdb,e.EV.E1,&retregs,false); 5015 getregs(cdb,retregs); // retregs will be destroyed 5016 if (sz <= REGSIZE) 5017 { 5018 /* CWD 5019 XOR AX,DX 5020 SUB AX,DX 5021 or: 5022 MOV r,reg 5023 SAR r,63 5024 XOR reg,r 5025 SUB reg,r 5026 */ 5027 reg_t reg; 5028 reg_t r; 5029 5030 if (!I16 && sz == REGSIZE) 5031 { 5032 reg = findreg(retregs); 5033 r = allocScratchReg(cdb, allregs & ~retregs); 5034 getregs(cdb,retregs); 5035 genmovreg(cdb,r,reg); // MOV r,reg 5036 cdb.genc2(0xC1,modregrmx(3,7,r),REGSIZE * 8 - 1); // SAR r,31/63 5037 code_orrex(cdb.last(), rex); 5038 } 5039 else 5040 { 5041 reg = AX; 5042 r = DX; 5043 getregs(cdb,mDX); 5044 if (!I16 && sz == SHORTSIZE) 5045 cdb.gen1(0x98); // CWDE 5046 cdb.gen1(0x99); // CWD 5047 code_orrex(cdb.last(), rex); 5048 } 5049 cdb.gen2(0x33 ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // XOR reg,r 5050 cdb.gen2(0x2B ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // SUB reg,r 5051 if (!I16 && sz == SHORTSIZE && *pretregs & mPSW) 5052 cdb.last().Iflags |= CFopsize | CFpsw; 5053 if (*pretregs & mPSW) 5054 cdb.last().Iflags |= CFpsw; 5055 *pretregs &= ~mPSW; // flags already set 5056 } 5057 else if (sz == 2 * REGSIZE) 5058 { 5059 /* or DX,DX 5060 jns L2 5061 neg DX 5062 neg AX 5063 sbb DX,0 5064 L2: 5065 */ 5066 5067 code *cnop = gennop(null); 5068 const msreg = findregmsw(retregs); 5069 const lsreg = findreglsw(retregs); 5070 genregs(cdb,0x09,msreg,msreg); // OR msreg,msreg 5071 genjmp(cdb,JNS,FLcode,cast(block *)cnop); 5072 cdb.gen2(0xF7,modregrm(3,3,msreg)); // NEG msreg 5073 cdb.gen2(0xF7,modregrm(3,3,lsreg)); // NEG lsreg+1 5074 cdb.genc2(0x81,modregrm(3,3,msreg),0); // SBB msreg,0 5075 cdb.append(cnop); 5076 } 5077 else 5078 assert(0); 5079 fixresult(cdb,e,retregs,pretregs); 5080 } 5081 5082 /************************** 5083 * Post increment and post decrement. 5084 */ 5085 5086 void cdpost(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5087 { 5088 //printf("cdpost(pretregs = %s)\n", regm_str(*pretregs)); 5089 code cs = void; 5090 const op = e.Eoper; // OPxxxx 5091 if (*pretregs == 0) // if nothing to return 5092 { 5093 cdaddass(cdb,e,pretregs); 5094 return; 5095 } 5096 const tym_t tyml = tybasic(e.EV.E1.Ety); 5097 const sz = _tysize[tyml]; 5098 elem *e2 = e.EV.E2; 5099 const rex = (I64 && sz == 8) ? REX_W : 0; 5100 5101 if (tyfloating(tyml)) 5102 { 5103 if (config.fpxmmregs && tyxmmreg(tyml) && 5104 !tycomplex(tyml) // SIMD code is not set up to deal with complex 5105 ) 5106 { 5107 xmmpost(cdb,e,pretregs); 5108 return; 5109 } 5110 5111 if (config.inline8087) 5112 { 5113 post87(cdb,e,pretregs); 5114 return; 5115 } 5116 static if (TARGET_WINDOS) 5117 { 5118 assert(sz <= 8); 5119 getlvalue(cdb,&cs,e.EV.E1,DOUBLEREGS); 5120 freenode(e.EV.E1); 5121 regm_t idxregs = idxregm(&cs); // mask of index regs used 5122 cs.Iop = 0x8B; /* MOV DOUBLEREGS,EA */ 5123 fltregs(cdb,&cs,tyml); 5124 stackchanged = 1; 5125 int stackpushsave = stackpush; 5126 regm_t retregs; 5127 if (sz == 8) 5128 { 5129 if (I32) 5130 { 5131 cdb.gen1(0x50 + DX); // PUSH DOUBLEREGS 5132 cdb.gen1(0x50 + AX); 5133 stackpush += DOUBLESIZE; 5134 retregs = DOUBLEREGS2_32; 5135 } 5136 else 5137 { 5138 cdb.gen1(0x50 + AX); 5139 cdb.gen1(0x50 + BX); 5140 cdb.gen1(0x50 + CX); 5141 cdb.gen1(0x50 + DX); /* PUSH DOUBLEREGS */ 5142 stackpush += DOUBLESIZE + DOUBLESIZE; 5143 5144 cdb.gen1(0x50 + AX); 5145 cdb.gen1(0x50 + BX); 5146 cdb.gen1(0x50 + CX); 5147 cdb.gen1(0x50 + DX); /* PUSH DOUBLEREGS */ 5148 retregs = DOUBLEREGS_16; 5149 } 5150 } 5151 else 5152 { 5153 stackpush += FLOATSIZE; /* so we know something is on */ 5154 if (!I32) 5155 cdb.gen1(0x50 + DX); 5156 cdb.gen1(0x50 + AX); 5157 retregs = FLOATREGS2; 5158 } 5159 cdb.genadjesp(stackpush - stackpushsave); 5160 5161 cgstate.stackclean++; 5162 scodelem(cdb,e2,&retregs,idxregs,false); 5163 cgstate.stackclean--; 5164 5165 if (tyml == TYdouble || tyml == TYdouble_alias) 5166 { 5167 retregs = DOUBLEREGS; 5168 callclib(cdb,e,(op == OPpostinc) ? CLIB.dadd : CLIB.dsub, 5169 &retregs,idxregs); 5170 } 5171 else /* tyml == TYfloat */ 5172 { 5173 retregs = FLOATREGS; 5174 callclib(cdb,e,(op == OPpostinc) ? CLIB.fadd : CLIB.fsub, 5175 &retregs,idxregs); 5176 } 5177 cs.Iop = 0x89; /* MOV EA,DOUBLEREGS */ 5178 fltregs(cdb,&cs,tyml); 5179 stackpushsave = stackpush; 5180 if (tyml == TYdouble || tyml == TYdouble_alias) 5181 { if (*pretregs == mSTACK) 5182 retregs = mSTACK; /* leave result on stack */ 5183 else 5184 { 5185 if (I32) 5186 { 5187 cdb.gen1(0x58 + AX); 5188 cdb.gen1(0x58 + DX); 5189 } 5190 else 5191 { 5192 cdb.gen1(0x58 + DX); 5193 cdb.gen1(0x58 + CX); 5194 cdb.gen1(0x58 + BX); 5195 cdb.gen1(0x58 + AX); 5196 } 5197 stackpush -= DOUBLESIZE; 5198 retregs = DOUBLEREGS; 5199 } 5200 } 5201 else 5202 { 5203 cdb.gen1(0x58 + AX); 5204 if (!I32) 5205 cdb.gen1(0x58 + DX); 5206 stackpush -= FLOATSIZE; 5207 retregs = FLOATREGS; 5208 } 5209 cdb.genadjesp(stackpush - stackpushsave); 5210 fixresult(cdb,e,retregs,pretregs); 5211 return; 5212 } 5213 } 5214 if (tyxmmreg(tyml)) 5215 { 5216 xmmpost(cdb,e,pretregs); 5217 return; 5218 } 5219 5220 assert(e2.Eoper == OPconst); 5221 uint isbyte = (sz == 1); 5222 regm_t possregs = isbyte ? BYTEREGS : allregs; 5223 getlvalue(cdb,&cs,e.EV.E1,0); 5224 freenode(e.EV.E1); 5225 regm_t idxregs = idxregm(&cs); // mask of index regs used 5226 if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 && 5227 (!I16 || (idxregs & (mBX | mSI | mDI | mBP)))) 5228 { 5229 // Generate: 5230 // TEST reg,reg 5231 // LEA reg,n[reg] // don't affect flags 5232 reg_t reg = cs.Irm & 7; 5233 if (cs.Irex & REX_B) 5234 reg |= 8; 5235 cs.Iop = 0x85 ^ isbyte; 5236 code_newreg(&cs, reg); 5237 cs.Iflags |= CFpsw; 5238 cdb.gen(&cs); // TEST reg,reg 5239 5240 // If lvalue is a register variable, we must mark it as modified 5241 modEA(cdb,&cs); 5242 5243 auto n = e2.EV.Vint; 5244 if (op == OPpostdec) 5245 n = -n; 5246 int rm = reg; 5247 if (I16) 5248 { 5249 static immutable byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; // copied from cod1.c 5250 rm = regtorm[reg]; 5251 } 5252 cdb.genc1(LEA,(rex << 16) | buildModregrm(2,reg,rm),FLconst,n); // LEA reg,n[reg] 5253 return; 5254 } 5255 else if (sz <= REGSIZE || tyfv(tyml)) 5256 { 5257 code cs2 = void; 5258 5259 cs.Iop = 0x8B ^ isbyte; 5260 regm_t retregs = possregs & ~idxregs & *pretregs; 5261 if (!tyfv(tyml)) 5262 { 5263 if (retregs == 0) 5264 retregs = possregs & ~idxregs; 5265 } 5266 else /* tyfv(tyml) */ 5267 { 5268 if ((retregs &= mLSW) == 0) 5269 retregs = mLSW & ~idxregs; 5270 /* Can't use LES if the EA uses ES as a seg override */ 5271 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 5272 { cs.Iop = 0xC4; /* LES */ 5273 getregs(cdb,mES); // allocate ES 5274 } 5275 } 5276 reg_t reg; 5277 allocreg(cdb,&retregs,®,TYint); 5278 code_newreg(&cs, reg); 5279 if (sz == 1 && I64 && reg >= 4) 5280 cs.Irex |= REX; 5281 cdb.gen(&cs); // MOV reg,EA 5282 cs2 = cs; 5283 5284 /* If lvalue is a register variable, we must mark it as modified */ 5285 modEA(cdb,&cs); 5286 5287 cs.Iop = 0x81 ^ isbyte; 5288 cs.Irm &= ~cast(int)modregrm(0,7,0); // reg field = 0 5289 cs.Irex &= ~REX_R; 5290 if (op == OPpostdec) 5291 cs.Irm |= modregrm(0,5,0); /* SUB */ 5292 cs.IFL2 = FLconst; 5293 targ_int n = e2.EV.Vint; 5294 cs.IEV2.Vint = n; 5295 if (n == 1) /* can use INC or DEC */ 5296 { 5297 cs.Iop |= 0xFE; /* xFE is dec byte, xFF is word */ 5298 if (op == OPpostdec) 5299 NEWREG(cs.Irm,1); // DEC EA 5300 else 5301 NEWREG(cs.Irm,0); // INC EA 5302 } 5303 else if (n == -1) // can use INC or DEC 5304 { 5305 cs.Iop |= 0xFE; // xFE is dec byte, xFF is word 5306 if (op == OPpostinc) 5307 NEWREG(cs.Irm,1); // DEC EA 5308 else 5309 NEWREG(cs.Irm,0); // INC EA 5310 } 5311 5312 // For scheduling purposes, we wish to replace: 5313 // MOV reg,EA 5314 // OP EA 5315 // with: 5316 // MOV reg,EA 5317 // OP reg 5318 // MOV EA,reg 5319 // ~OP reg 5320 if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 5321 config.target_cpu >= TARGET_Pentium && 5322 config.flags4 & CFG4speed) 5323 { 5324 // Replace EA in cs with reg 5325 cs.Irm = (cs.Irm & ~cast(int)modregrm(3,0,7)) | modregrm(3,0,reg & 7); 5326 if (reg & 8) 5327 { cs.Irex &= ~REX_R; 5328 cs.Irex |= REX_B; 5329 } 5330 else 5331 cs.Irex &= ~REX_B; 5332 if (I64 && sz == 1 && reg >= 4) 5333 cs.Irex |= REX; 5334 cdb.gen(&cs); // ADD/SUB reg,const 5335 5336 // Reverse MOV direction 5337 cs2.Iop ^= 2; 5338 cdb.gen(&cs2); // MOV EA,reg 5339 5340 // Toggle INC <. DEC, ADD <. SUB 5341 cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0); 5342 cdb.gen(&cs); 5343 5344 if (*pretregs & mPSW) 5345 { *pretregs &= ~mPSW; // flags already set 5346 code_orflag(cdb.last(),CFpsw); 5347 } 5348 } 5349 else 5350 cdb.gen(&cs); // ADD/SUB EA,const 5351 5352 freenode(e2); 5353 if (tyfv(tyml)) 5354 { 5355 reg_t preg; 5356 5357 getlvalue_msw(&cs); 5358 if (*pretregs & mES) 5359 { 5360 preg = ES; 5361 /* ES is already loaded if CFes is 0 */ 5362 cs.Iop = ((cs.Iflags & CFSEG) == CFes) ? 0x8E : NOP; 5363 NEWREG(cs.Irm,0); /* MOV ES,EA+2 */ 5364 } 5365 else 5366 { 5367 regm_t retregsx = *pretregs & mMSW; 5368 if (!retregsx) 5369 retregsx = mMSW; 5370 allocreg(cdb,&retregsx,&preg,TYint); 5371 cs.Iop = 0x8B; 5372 if (I32) 5373 cs.Iflags |= CFopsize; 5374 NEWREG(cs.Irm,preg); /* MOV preg,EA+2 */ 5375 } 5376 getregs(cdb,mask(preg)); 5377 cdb.gen(&cs); 5378 retregs = mask(reg) | mask(preg); 5379 } 5380 fixresult(cdb,e,retregs,pretregs); 5381 return; 5382 } 5383 else if (tyml == TYhptr) 5384 { 5385 uint rvalue; 5386 reg_t lreg; 5387 reg_t rtmp; 5388 regm_t mtmp; 5389 5390 rvalue = e2.EV.Vlong; 5391 freenode(e2); 5392 5393 // If h--, convert to h++ 5394 if (e.Eoper == OPpostdec) 5395 rvalue = -rvalue; 5396 5397 regm_t retregs = mLSW & ~idxregs & *pretregs; 5398 if (!retregs) 5399 retregs = mLSW & ~idxregs; 5400 allocreg(cdb,&retregs,&lreg,TYint); 5401 5402 // Can't use LES if the EA uses ES as a seg override 5403 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 5404 { cs.Iop = 0xC4; 5405 retregs |= mES; 5406 getregs(cdb,mES|mCX); // allocate ES 5407 cs.Irm |= modregrm(0,lreg,0); 5408 cdb.gen(&cs); // LES lreg,EA 5409 } 5410 else 5411 { cs.Iop = 0x8B; 5412 retregs |= mDX; 5413 getregs(cdb,mDX|mCX); 5414 cs.Irm |= modregrm(0,lreg,0); 5415 cdb.gen(&cs); // MOV lreg,EA 5416 NEWREG(cs.Irm,DX); 5417 getlvalue_msw(&cs); 5418 cdb.gen(&cs); // MOV DX,EA+2 5419 getlvalue_lsw(&cs); 5420 } 5421 5422 // Allocate temporary register, rtmp 5423 mtmp = ALLREGS & ~mCX & ~idxregs & ~retregs; 5424 allocreg(cdb,&mtmp,&rtmp,TYint); 5425 5426 movregconst(cdb,rtmp,rvalue >> 16,0); // MOV rtmp,e2+2 5427 getregs(cdb,mtmp); 5428 cs.Iop = 0x81; 5429 NEWREG(cs.Irm,0); 5430 cs.IFL2 = FLconst; 5431 cs.IEV2.Vint = rvalue; 5432 cdb.gen(&cs); // ADD EA,e2 5433 code_orflag(cdb.last(),CFpsw); 5434 cdb.genc2(0x81,modregrm(3,2,rtmp),0); // ADC rtmp,0 5435 genshift(cdb); // MOV CX,offset __AHSHIFT 5436 cdb.gen2(0xD3,modregrm(3,4,rtmp)); // SHL rtmp,CL 5437 cs.Iop = 0x01; 5438 NEWREG(cs.Irm,rtmp); // ADD EA+2,rtmp 5439 getlvalue_msw(&cs); 5440 cdb.gen(&cs); 5441 fixresult(cdb,e,retregs,pretregs); 5442 return; 5443 } 5444 else if (sz == 2 * REGSIZE) 5445 { 5446 regm_t retregs = allregs & ~idxregs & *pretregs; 5447 if ((retregs & mLSW) == 0) 5448 retregs |= mLSW & ~idxregs; 5449 if ((retregs & mMSW) == 0) 5450 retregs |= ALLREGS & mMSW; 5451 assert(retregs & mMSW && retregs & mLSW); 5452 reg_t reg; 5453 allocreg(cdb,&retregs,®,tyml); 5454 uint sreg = findreglsw(retregs); 5455 cs.Iop = 0x8B; 5456 cs.Irm |= modregrm(0,sreg,0); 5457 cdb.gen(&cs); // MOV sreg,EA 5458 NEWREG(cs.Irm,reg); 5459 getlvalue_msw(&cs); 5460 cdb.gen(&cs); // MOV reg,EA+2 5461 cs.Iop = 0x81; 5462 cs.Irm &= ~cast(int)modregrm(0,7,0); /* reg field = 0 for ADD */ 5463 if (op == OPpostdec) 5464 cs.Irm |= modregrm(0,5,0); /* SUB */ 5465 getlvalue_lsw(&cs); 5466 cs.IFL2 = FLconst; 5467 cs.IEV2.Vlong = e2.EV.Vlong; 5468 cdb.gen(&cs); // ADD/SUB EA,const 5469 code_orflag(cdb.last(),CFpsw); 5470 getlvalue_msw(&cs); 5471 cs.IEV2.Vlong = 0; 5472 if (op == OPpostinc) 5473 cs.Irm ^= modregrm(0,2,0); /* ADC */ 5474 else 5475 cs.Irm ^= modregrm(0,6,0); /* SBB */ 5476 cs.IEV2.Vlong = cast(targ_long)(e2.EV.Vullong >> (REGSIZE * 8)); 5477 cdb.gen(&cs); // ADC/SBB EA,0 5478 freenode(e2); 5479 fixresult(cdb,e,retregs,pretregs); 5480 return; 5481 } 5482 else 5483 { 5484 assert(0); 5485 } 5486 } 5487 5488 5489 void cderr(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5490 { 5491 debug 5492 elem_print(e); 5493 5494 //printf("op = %d, %d\n", e.Eoper, OPstring); 5495 //printf("string = %p, len = %d\n", e.EV.ss.Vstring, e.EV.ss.Vstrlen); 5496 //printf("string = '%.*s'\n", cast(int)e.EV.ss.Vstrlen, e.EV.ss.Vstring); 5497 assert(0); 5498 } 5499 5500 void cdinfo(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5501 { 5502 switch (e.EV.E1.Eoper) 5503 { 5504 version (MARS) 5505 { 5506 case OPdctor: 5507 codelem(cdb,e.EV.E2,pretregs,false); 5508 regm_t retregs = 0; 5509 codelem(cdb,e.EV.E1,&retregs,false); 5510 break; 5511 } 5512 version (SCPP) 5513 { 5514 case OPdtor: 5515 cdcomma(cdb,e,pretregs); 5516 break; 5517 case OPctor: 5518 codelem(cdb,e.EV.E2,pretregs,false); 5519 regm_t retregs = 0; 5520 codelem(cdb,e.EV.E1,&retregs,false); 5521 break; 5522 case OPmark: 5523 if (0 && config.exe == EX_WIN32) 5524 { 5525 const idx = except_index_get(); 5526 except_mark(); 5527 codelem(cdb,e.EV.E2,pretregs,false); 5528 if (config.exe == EX_WIN32 && idx != except_index_get()) 5529 { usednteh |= NTEHcleanup; 5530 nteh_gensindex(cdb,idx - 1); 5531 } 5532 except_release(); 5533 assert(idx == except_index_get()); 5534 } 5535 else 5536 { 5537 code cs = void; 5538 cs.Iop = ESCAPE | ESCmark; 5539 cs.Iflags = 0; 5540 cs.Irex = 0; 5541 cdb.gen(&cs); 5542 codelem(cdb,e.EV.E2,pretregs,false); 5543 cs.Iop = ESCAPE | ESCrelease; 5544 cdb.gen(&cs); 5545 } 5546 freenode(e.EV.E1); 5547 break; 5548 } 5549 default: 5550 assert(0); 5551 } 5552 } 5553 5554 /******************************************* 5555 * D constructor. 5556 */ 5557 5558 void cddctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5559 { 5560 /* Generate: 5561 ESCAPE | ESCdctor 5562 MOV sindex[BP],index 5563 */ 5564 usednteh |= EHcleanup; 5565 if (config.ehmethod == EHmethod.EH_WIN32) 5566 { usednteh |= NTEHcleanup | NTEH_try; 5567 nteh_usevars(); 5568 } 5569 assert(*pretregs == 0); 5570 code cs; 5571 cs.Iop = ESCAPE | ESCdctor; // mark start of EH range 5572 cs.Iflags = 0; 5573 cs.Irex = 0; 5574 cs.IFL1 = FLctor; 5575 cs.IEV1.Vtor = e; 5576 cdb.gen(&cs); 5577 nteh_gensindex(cdb,0); // the actual index will be patched in later 5578 // by except_fillInEHTable() 5579 } 5580 5581 /******************************************* 5582 * D destructor. 5583 */ 5584 5585 void cdddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5586 { 5587 if (config.ehmethod == EHmethod.EH_DWARF) 5588 { 5589 usednteh |= EHcleanup; 5590 5591 code cs; 5592 cs.Iop = ESCAPE | ESCddtor; // mark end of EH range and where landing pad is 5593 cs.Iflags = 0; 5594 cs.Irex = 0; 5595 cs.IFL1 = FLdtor; 5596 cs.IEV1.Vtor = e; 5597 cdb.gen(&cs); 5598 5599 // Mark all registers as destroyed 5600 getregsNoSave(allregs); 5601 5602 assert(*pretregs == 0); 5603 codelem(cdb,e.EV.E1,pretregs,false); 5604 return; 5605 } 5606 else 5607 { 5608 /* Generate: 5609 ESCAPE | ESCddtor 5610 MOV sindex[BP],index 5611 CALL dtor 5612 JMP L1 5613 Ldtor: 5614 ... e.EV.E1 ... 5615 RET 5616 L1: NOP 5617 */ 5618 usednteh |= EHcleanup; 5619 if (config.ehmethod == EHmethod.EH_WIN32) 5620 { usednteh |= NTEHcleanup | NTEH_try; 5621 nteh_usevars(); 5622 } 5623 5624 code cs; 5625 cs.Iop = ESCAPE | ESCddtor; 5626 cs.Iflags = 0; 5627 cs.Irex = 0; 5628 cs.IFL1 = FLdtor; 5629 cs.IEV1.Vtor = e; 5630 cdb.gen(&cs); 5631 5632 nteh_gensindex(cdb,0); // the actual index will be patched in later 5633 // by except_fillInEHTable() 5634 5635 // Mark all registers as destroyed 5636 getregsNoSave(allregs); 5637 5638 assert(*pretregs == 0); 5639 CodeBuilder cdbx; 5640 cdbx.ctor(); 5641 codelem(cdbx,e.EV.E1,pretregs,false); 5642 cdbx.gen1(0xC3); // RET 5643 code *c = cdbx.finish(); 5644 5645 int nalign = 0; 5646 if (STACKALIGN >= 16) 5647 { 5648 nalign = STACKALIGN - REGSIZE; 5649 cod3_stackadj(cdb, nalign); 5650 } 5651 calledafunc = 1; 5652 genjmp(cdb,0xE8,FLcode,cast(block *)c); // CALL Ldtor 5653 if (nalign) 5654 cod3_stackadj(cdb, -nalign); 5655 5656 code *cnop = gennop(null); 5657 5658 genjmp(cdb,JMP,FLcode,cast(block *)cnop); 5659 cdb.append(cdbx); 5660 cdb.append(cnop); 5661 return; 5662 } 5663 } 5664 5665 5666 /******************************************* 5667 * C++ constructor. 5668 */ 5669 5670 void cdctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5671 { 5672 version (SCPP) 5673 { 5674 usednteh |= EHcleanup; 5675 if (config.exe == EX_WIN32) 5676 usednteh |= NTEHcleanup; 5677 assert(*pretregs == 0); 5678 5679 code cs = void; 5680 cs.Iop = ESCAPE | ESCctor; 5681 cs.Iflags = 0; 5682 cs.Irex = 0; 5683 cs.IFL1 = FLctor; 5684 cs.IEV1.Vtor = e; 5685 cdb.gen(&cs); 5686 } 5687 } 5688 5689 void cddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5690 { 5691 version (SCPP) 5692 { 5693 usednteh |= EHcleanup; 5694 if (config.exe == EX_WIN32) 5695 usednteh |= NTEHcleanup; 5696 assert(*pretregs == 0); 5697 5698 code cs = void; 5699 cs.Iop = ESCAPE | ESCdtor; 5700 cs.Iflags = 0; 5701 cs.Irex = 0; 5702 cs.IFL1 = FLdtor; 5703 cs.IEV1.Vtor = e; 5704 cdb.gen(&cs); 5705 } 5706 } 5707 5708 void cdmark(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5709 { 5710 } 5711 5712 static if (!NTEXCEPTIONS) 5713 { 5714 void cdsetjmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5715 { 5716 assert(0); 5717 } 5718 } 5719 5720 /***************************************** 5721 */ 5722 5723 void cdvoid(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5724 { 5725 assert(*pretregs == 0); 5726 codelem(cdb,e.EV.E1,pretregs,false); 5727 } 5728 5729 /***************************************** 5730 */ 5731 5732 void cdhalt(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5733 { 5734 assert(*pretregs == 0); 5735 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); 5736 } 5737 5738 }