1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1984-1998 by Symantec 6 * Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod2.d, backend/cod2.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod2.d 11 */ 12 13 module dmd.backend.cod2; 14 15 version (SCPP) 16 version = COMPILE; 17 version (MARS) 18 version = COMPILE; 19 20 version (COMPILE) 21 { 22 23 import core.stdc.stdio; 24 import core.stdc.stdlib; 25 import core.stdc.string; 26 27 import dmd.backend.backend; 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.code; 31 import dmd.backend.code_x86; 32 import dmd.backend.codebuilder; 33 import dmd.backend.mem; 34 import dmd.backend.el; 35 import dmd.backend.exh; 36 import dmd.backend.global; 37 import dmd.backend.oper; 38 import dmd.backend.ty; 39 import dmd.backend.type; 40 import dmd.backend.xmm; 41 42 extern (C++): 43 44 nothrow: 45 46 int REGSIZE(); 47 48 extern __gshared CGstate cgstate; 49 extern __gshared ubyte[FLMAX] segfl; 50 extern __gshared bool[FLMAX] stackfl; 51 52 __gshared int cdcmp_flag; 53 54 private extern (D) uint mask(uint m) { return 1 << m; } 55 56 // from divcoeff.c 57 extern (C) 58 { 59 bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost); 60 bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost); 61 } 62 63 /******************************* 64 * Swap two registers. 65 */ 66 67 private void swap(reg_t *a,reg_t *b) 68 { 69 const tmp = *a; 70 *a = *b; 71 *b = tmp; 72 } 73 74 75 /******************************************* 76 * Returns: true if cannot use this EA in anything other than a MOV instruction. 77 */ 78 79 bool movOnly(const elem *e) 80 { 81 if (config.exe & EX_OSX64 && config.flags3 & CFG3pic && e.Eoper == OPvar) 82 { 83 const s = e.EV.Vsym; 84 // Fixups for these can only be done with a MOV 85 if (s.Sclass == SCglobal || s.Sclass == SCextern || 86 s.Sclass == SCcomdat || s.Sclass == SCcomdef) 87 return true; 88 } 89 return false; 90 } 91 92 /******************************** 93 * Determine index registers used by addressing mode. 94 * Index is rm of modregrm field. 95 * Returns: 96 * mask of index registers 97 */ 98 99 regm_t idxregm(const code* c) 100 { 101 const rm = c.Irm; 102 regm_t idxm; 103 if ((rm & 0xC0) != 0xC0) /* if register is not the destination */ 104 { 105 if (I16) 106 { 107 static immutable ubyte[8] idxrm = [mBX|mSI,mBX|mDI,mSI,mDI,mSI,mDI,0,mBX]; 108 idxm = idxrm[rm & 7]; 109 } 110 else 111 { 112 if ((rm & 7) == 4) /* if sib byte */ 113 { 114 const sib = c.Isib; 115 reg_t idxreg = (sib >> 3) & 7; 116 // scaled index reg 117 idxm = mask(idxreg | ((c.Irex & REX_X) ? 8 : 0)); 118 119 if ((sib & 7) == 5 && (rm & 0xC0) == 0) 120 { } 121 else 122 idxm |= mask((sib & 7) | ((c.Irex & REX_B) ? 8 : 0)); 123 } 124 else 125 idxm = mask((rm & 7) | ((c.Irex & REX_B) ? 8 : 0)); 126 } 127 } 128 return idxm; 129 } 130 131 132 /*************************** 133 * Gen code for call to floating point routine. 134 */ 135 136 void opdouble(ref CodeBuilder cdb, elem *e,regm_t *pretregs,uint clib) 137 { 138 if (config.inline8087) 139 { 140 orth87(cdb,e,pretregs); 141 return; 142 } 143 144 regm_t retregs1,retregs2; 145 if (tybasic(e.EV.E1.Ety) == TYfloat) 146 { 147 clib += CLIB.fadd - CLIB.dadd; /* convert to float operation */ 148 retregs1 = FLOATREGS; 149 retregs2 = FLOATREGS2; 150 } 151 else 152 { 153 if (I32) 154 { retregs1 = DOUBLEREGS_32; 155 retregs2 = DOUBLEREGS2_32; 156 } 157 else 158 { retregs1 = mSTACK; 159 retregs2 = DOUBLEREGS_16; 160 } 161 } 162 163 codelem(cdb,e.EV.E1, &retregs1,false); 164 if (retregs1 & mSTACK) 165 cgstate.stackclean++; 166 scodelem(cdb,e.EV.E2, &retregs2, retregs1 & ~mSTACK, false); 167 if (retregs1 & mSTACK) 168 cgstate.stackclean--; 169 callclib(cdb, e, clib, pretregs, 0); 170 } 171 172 /***************************** 173 * Handle operators which are more or less orthogonal 174 * ( + - & | ^ ) 175 */ 176 177 void cdorth(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 178 { 179 //printf("cdorth(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 180 elem *e1 = e.EV.E1; 181 elem *e2 = e.EV.E2; 182 if (*pretregs == 0) // if don't want result 183 { 184 codelem(cdb,e1,pretregs,false); // eval left leaf 185 *pretregs = 0; // in case they got set 186 codelem(cdb,e2,pretregs,false); 187 return; 188 } 189 190 const ty = tybasic(e.Ety); 191 const ty1 = tybasic(e1.Ety); 192 193 if (tyfloating(ty1)) 194 { 195 if (tyvector(ty1) || 196 config.fpxmmregs && tyxmmreg(ty1) && 197 !(*pretregs & mST0) && 198 !(*pretregs & mST01) && 199 !(ty == TYldouble || ty == TYildouble) // watch out for shrinkLongDoubleConstantIfPossible() 200 ) 201 { 202 orthxmm(cdb,e,pretregs); 203 return; 204 } 205 if (config.inline8087) 206 { 207 orth87(cdb,e,pretregs); 208 return; 209 } 210 if (config.exe & EX_windos) 211 { 212 opdouble(cdb,e,pretregs,(e.Eoper == OPadd) ? CLIB.dadd 213 : CLIB.dsub); 214 return; 215 } 216 else 217 { 218 assert(0); 219 } 220 } 221 if (tyxmmreg(ty1)) 222 { 223 orthxmm(cdb,e,pretregs); 224 return; 225 } 226 227 opcode_t op1, op2; 228 uint mode; 229 __gshared int nest; 230 231 const ty2 = tybasic(e2.Ety); 232 const e2oper = e2.Eoper; 233 const sz = _tysize[ty]; 234 const isbyte = (sz == 1); 235 code_flags_t word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 236 bool test = false; // assume we destroyed lvalue 237 238 switch (e.Eoper) 239 { 240 case OPadd: mode = 0; 241 op1 = 0x03; op2 = 0x13; break; /* ADD, ADC */ 242 case OPmin: mode = 5; 243 op1 = 0x2B; op2 = 0x1B; break; /* SUB, SBB */ 244 case OPor: mode = 1; 245 op1 = 0x0B; op2 = 0x0B; break; /* OR , OR */ 246 case OPxor: mode = 6; 247 op1 = 0x33; op2 = 0x33; break; /* XOR, XOR */ 248 case OPand: mode = 4; 249 op1 = 0x23; op2 = 0x23; /* AND, AND */ 250 if (tyreg(ty1) && 251 *pretregs == mPSW) /* if flags only */ 252 { 253 test = true; 254 op1 = 0x85; /* TEST */ 255 mode = 0; 256 } 257 break; 258 259 default: 260 assert(0); 261 } 262 op1 ^= isbyte; /* if byte operation */ 263 264 // Compute numwords, the number of words to operate on. 265 int numwords = 1; 266 if (!I16) 267 { 268 /* Cannot operate on longs and then do a 'paint' to a far */ 269 /* pointer, because far pointers are 48 bits and longs are 32. */ 270 /* Therefore, numwords can never be 2. */ 271 assert(!(tyfv(ty1) && tyfv(ty2))); 272 if (sz == 2 * REGSIZE) 273 { 274 numwords++; 275 } 276 } 277 else 278 { 279 /* If ty is a TYfptr, but both operands are long, treat the */ 280 /* operation as a long. */ 281 if ((tylong(ty1) || ty1 == TYhptr) && 282 (tylong(ty2) || ty2 == TYhptr)) 283 numwords++; 284 } 285 286 // Special cases where only flags are set 287 if (test && _tysize[ty1] <= REGSIZE && 288 (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 289 && !movOnly(e1) 290 ) 291 { 292 // Handle the case of (var & const) 293 if (e2.Eoper == OPconst && el_signx32(e2)) 294 { 295 code cs = void; 296 cs.Iflags = 0; 297 cs.Irex = 0; 298 getlvalue(cdb,&cs,e1,0); 299 targ_size_t value = e2.EV.Vpointer; 300 if (sz == 2) 301 value &= 0xFFFF; 302 else if (sz == 4) 303 value &= 0xFFFFFFFF; 304 reg_t reg; 305 if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,value,®)) 306 { 307 code_newreg(&cs, reg); 308 if (I64 && isbyte && reg >= 4) 309 cs.Irex |= REX; 310 } 311 else 312 { 313 if (sz == 8 && !I64) 314 { 315 assert(value == cast(int)value); // sign extend imm32 316 } 317 op1 = 0xF7; 318 cs.IEV2.Vint = cast(targ_int)value; 319 cs.IFL2 = FLconst; 320 } 321 cs.Iop = op1 ^ isbyte; 322 cs.Iflags |= word | CFpsw; 323 freenode(e1); 324 freenode(e2); 325 cdb.gen(&cs); 326 return; 327 } 328 329 // Handle (exp & reg) 330 reg_t reg; 331 regm_t retregs; 332 if (isregvar(e2,&retregs,®)) 333 { 334 code cs = void; 335 cs.Iflags = 0; 336 cs.Irex = 0; 337 getlvalue(cdb,&cs,e1,0); 338 code_newreg(&cs, reg); 339 if (I64 && isbyte && reg >= 4) 340 cs.Irex |= REX; 341 cs.Iop = op1 ^ isbyte; 342 cs.Iflags |= word | CFpsw; 343 freenode(e1); 344 freenode(e2); 345 cdb.gen(&cs); 346 return; 347 } 348 } 349 350 code cs = void; 351 cs.Iflags = 0; 352 cs.Irex = 0; 353 354 // Look for possible uses of LEA 355 if (e.Eoper == OPadd && 356 !(*pretregs & mPSW) && // flags aren't set by LEA 357 !nest && // could cause infinite recursion if e.Ecount 358 (sz == REGSIZE || (I64 && sz == 4))) // far pointers aren't handled 359 { 360 const rex = (sz == 8) ? REX_W : 0; 361 362 // Handle the case of (e + &var) 363 int e1oper = e1.Eoper; 364 if ((e2oper == OPrelconst && (config.target_cpu >= TARGET_Pentium || (!e2.Ecount && stackfl[el_fl(e2)]))) 365 || // LEA costs too much for simple EAs on older CPUs 366 (e2oper == OPconst && (e1.Eoper == OPcall || e1.Eoper == OPcallns) && !(*pretregs & mAX)) || 367 (!I16 && (isscaledindex(e1) || isscaledindex(e2))) || 368 (!I16 && e1oper == OPvar && e1.EV.Vsym.Sfl == FLreg && (e2oper == OPconst || (e2oper == OPvar && e2.EV.Vsym.Sfl == FLreg))) || 369 (e2oper == OPconst && e1oper == OPeq && e1.EV.E1.Eoper == OPvar) || 370 (!I16 && (e2oper == OPrelconst || e2oper == OPconst) && !e1.Ecount && 371 (e1oper == OPmul || e1oper == OPshl) && 372 e1.EV.E2.Eoper == OPconst && 373 ssindex(e1oper,e1.EV.E2.EV.Vuns) 374 ) || 375 (!I16 && e1.Ecount) 376 ) 377 { 378 const inc = e.Ecount != 0; 379 nest += inc; 380 code csx = void; 381 getlvalue(cdb,&csx,e,0); 382 nest -= inc; 383 reg_t regx; 384 allocreg(cdb,pretregs,®x,ty); 385 csx.Iop = LEA; 386 code_newreg(&csx, regx); 387 cdb.gen(&csx); // LEA regx,EA 388 if (rex) 389 code_orrex(cdb.last(), rex); 390 return; 391 } 392 393 // Handle the case of ((e + c) + e2) 394 if (!I16 && 395 e1oper == OPadd && 396 (e1.EV.E2.Eoper == OPconst && el_signx32(e1.EV.E2) || 397 e2oper == OPconst && el_signx32(e2)) && 398 !e1.Ecount 399 ) 400 { 401 elem *ebase; 402 elem *edisp; 403 if (e2oper == OPconst && el_signx32(e2)) 404 { edisp = e2; 405 ebase = e1.EV.E2; 406 } 407 else 408 { edisp = e1.EV.E2; 409 ebase = e2; 410 } 411 412 auto e11 = e1.EV.E1; 413 regm_t retregs = *pretregs & ALLREGS; 414 if (!retregs) 415 retregs = ALLREGS; 416 int ss = 0; 417 int ss2 = 0; 418 419 // Handle the case of (((e * c1) + c2) + e2) 420 // Handle the case of (((e << c1) + c2) + e2) 421 if ((e11.Eoper == OPmul || e11.Eoper == OPshl) && 422 e11.EV.E2.Eoper == OPconst && 423 !e11.Ecount 424 ) 425 { 426 const co1 = cast(targ_size_t)el_tolong(e11.EV.E2); 427 if (e11.Eoper == OPshl) 428 { 429 if (co1 > 3) 430 goto L13; 431 ss = cast(int)co1; 432 } 433 else 434 { 435 ss2 = 1; 436 switch (co1) 437 { 438 case 6: ss = 1; break; 439 case 12: ss = 1; ss2 = 2; break; 440 case 24: ss = 1; ss2 = 3; break; 441 case 10: ss = 2; break; 442 case 20: ss = 2; ss2 = 2; break; 443 case 40: ss = 2; ss2 = 3; break; 444 case 18: ss = 3; break; 445 case 36: ss = 3; ss2 = 2; break; 446 case 72: ss = 3; ss2 = 3; break; 447 default: 448 ss2 = 0; 449 goto L13; 450 } 451 } 452 freenode(e11.EV.E2); 453 freenode(e11); 454 e11 = e11.EV.E1; 455 L13: 456 { } 457 } 458 459 reg_t reg11; 460 regm_t regm; 461 if (e11.Eoper == OPvar && isregvar(e11,®m,®11)) 462 { 463 if (tysize(e11.Ety) <= REGSIZE) 464 retregs = mask(reg11); // only want the LSW 465 else 466 retregs = regm; 467 freenode(e11); 468 } 469 else 470 codelem(cdb,e11,&retregs,false); 471 472 regm_t rretregs = ALLREGS & ~retregs & ~mBP; 473 scodelem(cdb,ebase,&rretregs,retregs,true); 474 reg_t reg; 475 { 476 regm_t sregs = *pretregs & ~rretregs; 477 if (!sregs) 478 sregs = ALLREGS & ~rretregs; 479 allocreg(cdb,&sregs,®,ty); 480 } 481 482 assert((retregs & (retregs - 1)) == 0); // must be only one register 483 assert((rretregs & (rretregs - 1)) == 0); // must be only one register 484 485 auto reg1 = findreg(retregs); 486 const reg2 = findreg(rretregs); 487 488 if (ss2) 489 { 490 assert(reg != reg2); 491 if ((reg1 & 7) == BP) 492 { static immutable uint[4] imm32 = [1+1,2+1,4+1,8+1]; 493 494 // IMUL reg,imm32 495 cdb.genc2(0x69,modregxrmx(3,reg,reg1),imm32[ss]); 496 } 497 else 498 { // LEA reg,[reg1*ss][reg1] 499 cdb.gen2sib(LEA,modregxrm(0,reg,4),modregrm(ss,reg1 & 7,reg1 & 7)); 500 if (reg1 & 8) 501 code_orrex(cdb.last(), REX_X | REX_B); 502 } 503 if (rex) 504 code_orrex(cdb.last(), rex); 505 reg1 = reg; 506 ss = ss2; // use *2 for scale 507 } 508 509 cs.Iop = LEA; // LEA reg,c[reg1*ss][reg2] 510 cs.Irm = modregrm(2,reg & 7,4); 511 cs.Isib = modregrm(ss,reg1 & 7,reg2 & 7); 512 assert(reg2 != BP); 513 cs.Iflags = CFoff; 514 cs.Irex = cast(ubyte)rex; 515 if (reg & 8) 516 cs.Irex |= REX_R; 517 if (reg1 & 8) 518 cs.Irex |= REX_X; 519 if (reg2 & 8) 520 cs.Irex |= REX_B; 521 cs.IFL1 = FLconst; 522 cs.IEV1.Vsize_t = edisp.EV.Vuns; 523 524 freenode(edisp); 525 freenode(e1); 526 cdb.gen(&cs); 527 fixresult(cdb,e,mask(reg),pretregs); 528 return; 529 } 530 } 531 532 regm_t posregs = (isbyte) ? BYTEREGS : (mES | ALLREGS | mBP); 533 regm_t retregs = *pretregs & posregs; 534 if (retregs == 0) /* if no return regs speced */ 535 /* (like if wanted flags only) */ 536 retregs = ALLREGS & posregs; // give us some 537 538 if (ty1 == TYhptr || ty2 == TYhptr) 539 { /* Generate code for add/subtract of huge pointers. 540 No attempt is made to generate very good code. 541 */ 542 retregs = (retregs & mLSW) | mDX; 543 regm_t rretregs; 544 if (ty1 == TYhptr) 545 { // hptr +- long 546 rretregs = mLSW & ~(retregs | regcon.mvar); 547 if (!rretregs) 548 rretregs = mLSW; 549 rretregs |= mCX; 550 codelem(cdb,e1,&rretregs,0); 551 retregs &= ~rretregs; 552 if (!(retregs & mLSW)) 553 retregs |= mLSW & ~rretregs; 554 555 scodelem(cdb,e2,&retregs,rretregs,true); 556 } 557 else 558 { // long + hptr 559 codelem(cdb,e1,&retregs,0); 560 rretregs = (mLSW | mCX) & ~retregs; 561 if (!(rretregs & mLSW)) 562 rretregs |= mLSW; 563 scodelem(cdb,e2,&rretregs,retregs,true); 564 } 565 getregs(cdb,rretregs | retregs); 566 const mreg = DX; 567 const lreg = findreglsw(retregs); 568 if (e.Eoper == OPmin) 569 { // negate retregs 570 cdb.gen2(0xF7,modregrm(3,3,mreg)); // NEG mreg 571 cdb.gen2(0xF7,modregrm(3,3,lreg)); // NEG lreg 572 code_orflag(cdb.last(),CFpsw); 573 cdb.genc2(0x81,modregrm(3,3,mreg),0); // SBB mreg,0 574 } 575 const lrreg = findreglsw(rretregs); 576 genregs(cdb,0x03,lreg,lrreg); // ADD lreg,lrreg 577 code_orflag(cdb.last(),CFpsw); 578 genmovreg(cdb,lrreg,CX); // MOV lrreg,CX 579 cdb.genc2(0x81,modregrm(3,2,mreg),0); // ADC mreg,0 580 genshift(cdb); // MOV CX,offset __AHSHIFT 581 cdb.gen2(0xD3,modregrm(3,4,mreg)); // SHL mreg,CL 582 genregs(cdb,0x03,mreg,lrreg); // ADD mreg,MSREG(h) 583 fixresult(cdb,e,retregs,pretregs); 584 return; 585 } 586 587 regm_t rretregs; 588 reg_t reg; 589 if (_tysize[ty1] > REGSIZE && numwords == 1) 590 { /* The only possibilities are (TYfptr + tyword) or (TYfptr - tyword) */ 591 592 debug 593 if (_tysize[ty2] != REGSIZE) 594 { 595 printf("e = %p, e.Eoper = ",e); 596 WROP(e.Eoper); 597 printf(" e1.Ety = "); 598 WRTYxx(ty1); 599 printf(" e2.Ety = "); 600 WRTYxx(ty2); 601 printf("\n"); 602 elem_print(e); 603 } 604 605 assert(_tysize[ty2] == REGSIZE); 606 607 /* Watch out for the case here where you are going to OP reg,EA */ 608 /* and both the reg and EA use ES! Prevent this by forcing */ 609 /* reg into the regular registers. */ 610 if ((e2oper == OPind || 611 (e2oper == OPvar && el_fl(e2) == FLfardata)) && 612 !e2.Ecount) 613 { 614 retregs = ALLREGS; 615 } 616 617 codelem(cdb,e1,&retregs,test != 0); 618 reg = findreglsw(retregs); /* reg is the register with the offset*/ 619 } 620 else 621 { 622 regm_t regm; 623 624 /* if (tyword + TYfptr) */ 625 if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE) 626 { retregs = ~*pretregs & ALLREGS; 627 628 /* if retregs doesn't have any regs in it that aren't reg vars */ 629 if ((retregs & ~regcon.mvar) == 0) 630 retregs |= mAX; 631 } 632 else if (numwords == 2 && retregs & mES) 633 retregs = (retregs | mMSW) & ALLREGS; 634 635 // Determine if we should swap operands, because 636 // mov EAX,x 637 // add EAX,reg 638 // is faster than: 639 // mov EAX,reg 640 // add EAX,x 641 else if (e2oper == OPvar && 642 e1.Eoper == OPvar && 643 e.Eoper != OPmin && 644 isregvar(e1,®m,null) && 645 regm != retregs && 646 _tysize[ty1] == _tysize[ty2]) 647 { 648 elem *es = e1; 649 e1 = e2; 650 e2 = es; 651 } 652 codelem(cdb,e1,&retregs,test != 0); // eval left leaf 653 reg = findreg(retregs); 654 } 655 reg_t rreg; 656 int rval; 657 targ_size_t i; 658 switch (e2oper) 659 { 660 case OPind: /* if addressing mode */ 661 if (!e2.Ecount) /* if not CSE */ 662 goto L1; /* try OP reg,EA */ 663 goto default; 664 665 default: /* operator node */ 666 L2: 667 rretregs = ALLREGS & ~retregs; 668 /* Be careful not to do arithmetic on ES */ 669 if (_tysize[ty1] == REGSIZE && _tysize[ty2] > REGSIZE && *pretregs != mPSW) 670 rretregs = *pretregs & (mES | ALLREGS | mBP) & ~retregs; 671 else if (isbyte) 672 rretregs &= BYTEREGS; 673 674 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 675 rreg = (_tysize[ty2] > REGSIZE) ? findreglsw(rretregs) : findreg(rretregs); 676 if (!test) 677 getregs(cdb,retregs); // we will trash these regs 678 if (numwords == 1) /* ADD reg,rreg */ 679 { 680 /* reverse operands to avoid moving around the segment value */ 681 if (_tysize[ty2] > REGSIZE) 682 { 683 getregs(cdb,rretregs); 684 genregs(cdb,op1,rreg,reg); 685 retregs = rretregs; // reverse operands 686 } 687 else 688 { 689 genregs(cdb,op1,reg,rreg); 690 if (!I16 && *pretregs & mPSW) 691 cdb.last().Iflags |= word; 692 } 693 if (I64 && sz == 8) 694 code_orrex(cdb.last(), REX_W); 695 if (I64 && isbyte && (reg >= 4 || rreg >= 4)) 696 code_orrex(cdb.last(), REX); 697 } 698 else /* numwords == 2 */ /* ADD lsreg,lsrreg */ 699 { 700 reg = findreglsw(retregs); 701 rreg = findreglsw(rretregs); 702 genregs(cdb,op1,reg,rreg); 703 if (e.Eoper == OPadd || e.Eoper == OPmin) 704 code_orflag(cdb.last(),CFpsw); 705 reg = findregmsw(retregs); 706 rreg = findregmsw(rretregs); 707 if (!(e2oper == OPu16_32 && // if second operand is 0 708 (op2 == 0x0B || op2 == 0x33)) // and OR or XOR 709 ) 710 genregs(cdb,op2,reg,rreg); // ADC msreg,msrreg 711 } 712 break; 713 714 case OPrelconst: 715 if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) 716 goto default; 717 if (sz != REGSIZE) 718 goto L2; 719 if (segfl[el_fl(e2)] != 3) /* if not in data segment */ 720 goto L2; 721 if (evalinregister(e2)) 722 goto L2; 723 cs.IEV2.Voffset = e2.EV.Voffset; 724 cs.IEV2.Vsym = e2.EV.Vsym; 725 cs.Iflags |= CFoff; 726 i = 0; /* no INC or DEC opcode */ 727 rval = 0; 728 goto L3; 729 730 case OPconst: 731 if (tyfv(ty2)) 732 goto L2; 733 if (numwords == 1) 734 { 735 if (!el_signx32(e2)) 736 goto L2; 737 i = e2.EV.Vpointer; 738 if (word) 739 { 740 if (!(*pretregs & mPSW) && 741 config.flags4 & CFG4speed && 742 (e.Eoper == OPor || e.Eoper == OPxor || test || 743 (e1.Eoper != OPvar && e1.Eoper != OPind))) 744 { word = 0; 745 i &= 0xFFFF; 746 } 747 } 748 rval = reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,&rreg); 749 cs.IEV2.Vsize_t = i; 750 L3: 751 if (!test) 752 getregs(cdb,retregs); // we will trash these regs 753 op1 ^= isbyte; 754 cs.Iflags |= word; 755 if (rval) 756 { cs.Iop = op1 ^ 2; 757 mode = rreg; 758 } 759 else 760 cs.Iop = 0x81; 761 cs.Irm = modregrm(3,mode&7,reg&7); 762 if (mode & 8) 763 cs.Irex |= REX_R; 764 if (reg & 8) 765 cs.Irex |= REX_B; 766 if (I64 && sz == 8) 767 cs.Irex |= REX_W; 768 if (I64 && isbyte && (reg >= 4 || (rval && rreg >= 4))) 769 cs.Irex |= REX; 770 cs.IFL2 = cast(ubyte)((e2.Eoper == OPconst) ? FLconst : el_fl(e2)); 771 /* Modify instruction for special cases */ 772 switch (e.Eoper) 773 { 774 case OPadd: 775 { 776 int iop; 777 778 if (i == 1) 779 iop = 0; /* INC reg */ 780 else if (i == -1) 781 iop = 8; /* DEC reg */ 782 else 783 break; 784 cs.Iop = (0x40 | iop | reg) ^ isbyte; 785 if ((isbyte && *pretregs & mPSW) || I64) 786 { 787 cs.Irm = cast(ubyte)(modregrm(3,0,reg & 7) | iop); 788 cs.Iop = 0xFF; 789 } 790 break; 791 } 792 793 case OPand: 794 if (test) 795 cs.Iop = rval ? op1 : 0xF7; // TEST 796 break; 797 798 default: 799 break; 800 } 801 if (*pretregs & mPSW) 802 cs.Iflags |= CFpsw; 803 cs.Iop ^= isbyte; 804 cdb.gen(&cs); 805 cs.Iflags &= ~CFpsw; 806 } 807 else if (numwords == 2) 808 { 809 getregs(cdb,retregs); 810 reg = findregmsw(retregs); 811 const lsreg = findreglsw(retregs); 812 cs.Iop = 0x81; 813 cs.Irm = modregrm(3,mode,lsreg); 814 cs.IFL2 = FLconst; 815 const msw = cast(targ_int)MSREG(e2.EV.Vllong); 816 cs.IEV2.Vint = e2.EV.Vlong; 817 switch (e.Eoper) 818 { 819 case OPadd: 820 case OPmin: 821 cs.Iflags |= CFpsw; 822 break; 823 824 default: 825 break; 826 } 827 cdb.gen(&cs); 828 cs.Iflags &= ~CFpsw; 829 830 cs.Irm = cast(ubyte)((cs.Irm & modregrm(3,7,0)) | reg); 831 cs.IEV2.Vint = msw; 832 if (e.Eoper == OPadd) 833 cs.Irm |= modregrm(0,2,0); /* ADC */ 834 cdb.gen(&cs); 835 } 836 else 837 assert(0); 838 freenode(e2); 839 break; 840 841 case OPvar: 842 if (movOnly(e2)) 843 goto L2; 844 L1: 845 if (tyfv(ty2)) 846 goto L2; 847 if (!test) 848 getregs(cdb,retregs); // we will trash these regs 849 loadea(cdb,e2,&cs,op1, 850 ((numwords == 2) ? findreglsw(retregs) : reg), 851 0,retregs,retregs); 852 if (!I16 && word) 853 { if (*pretregs & mPSW) 854 code_orflag(cdb.last(),word); 855 else 856 cdb.last().Iflags &= ~cast(int)word; 857 } 858 else if (numwords == 2) 859 { 860 if (e.Eoper == OPadd || e.Eoper == OPmin) 861 code_orflag(cdb.last(),CFpsw); 862 reg = findregmsw(retregs); 863 if (!OTleaf(e2.Eoper)) 864 { getlvalue_msw(&cs); 865 cs.Iop = op2; 866 NEWREG(cs.Irm,reg); 867 cdb.gen(&cs); // ADC reg,data+2 868 } 869 else 870 loadea(cdb,e2,&cs,op2,reg,REGSIZE,retregs,0); 871 } 872 else if (I64 && sz == 8) 873 code_orrex(cdb.last(), REX_W); 874 freenode(e2); 875 break; 876 } 877 878 if (sz <= REGSIZE && *pretregs & mPSW) 879 { 880 /* If the expression is (_tls_array + ...), then the flags are not set 881 * since the linker may rewrite these instructions into something else. 882 */ 883 if (I64 && e.Eoper == OPadd && e1.Eoper == OPvar) 884 { 885 const s = e1.EV.Vsym; 886 if (s.Sident[0] == '_' && memcmp(s.Sident.ptr + 1,"tls_array".ptr,10) == 0) 887 { 888 goto L7; // don't assume flags are set 889 } 890 } 891 code_orflag(cdb.last(),CFpsw); 892 *pretregs &= ~mPSW; // flags already set 893 L7: { } 894 } 895 fixresult(cdb,e,retregs,pretregs); 896 } 897 898 899 /***************************** 900 * Handle multiply. 901 */ 902 903 void cdmul(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 904 { 905 //printf("cdmul()\n"); 906 elem *e1 = e.EV.E1; 907 elem *e2 = e.EV.E2; 908 if (*pretregs == 0) // if don't want result 909 { 910 codelem(cdb,e1,pretregs,false); // eval left leaf 911 *pretregs = 0; // in case they got set 912 codelem(cdb,e2,pretregs,false); 913 return; 914 } 915 916 //printf("cdmul(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 917 const tyml = tybasic(e1.Ety); 918 const ty = tybasic(e.Ety); 919 const oper = e.Eoper; 920 921 if (tyfloating(tyml)) 922 { 923 if (tyvector(tyml) || 924 config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) && 925 !(*pretregs & mST0) && 926 !(ty == TYldouble || ty == TYildouble) && // watch out for shrinkLongDoubleConstantIfPossible() 927 !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div 928 !(ty == TYllong) // or passing to function through integer register 929 ) 930 { 931 orthxmm(cdb,e,pretregs); 932 return; 933 } 934 if (config.exe & EX_posix) 935 orth87(cdb,e,pretregs); 936 else 937 opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv); 938 939 return; 940 } 941 942 if (tyxmmreg(tyml)) 943 { 944 orthxmm(cdb,e,pretregs); 945 return; 946 } 947 948 const uns = tyuns(tyml) || tyuns(e2.Ety); // 1 if signed operation, 0 if unsigned 949 const isbyte = tybyte(e.Ety) != 0; 950 const sz = _tysize[tyml]; 951 const ubyte rex = (I64 && sz == 8) ? REX_W : 0; 952 const uint grex = rex << 16; 953 const OPER opunslng = I16 ? OPu16_32 : OPu32_64; 954 955 code cs = void; 956 cs.Iflags = 0; 957 cs.Irex = 0; 958 959 switch (e2.Eoper) 960 { 961 case OPu16_32: 962 case OPs16_32: 963 case OPu32_64: 964 case OPs32_64: 965 { 966 if (sz != 2 * REGSIZE || e1.Eoper != e2.Eoper || 967 e1.Ecount || e2.Ecount) 968 goto default; 969 const ubyte opx = (e2.Eoper == opunslng) ? 4 : 5; 970 regm_t retregsx = mAX; 971 codelem(cdb,e1.EV.E1,&retregsx,false); // eval left leaf 972 if (e2.EV.E1.Eoper == OPvar || 973 (e2.EV.E1.Eoper == OPind && !e2.EV.E1.Ecount) 974 ) 975 { 976 loadea(cdb,e2.EV.E1,&cs,0xF7,opx,0,mAX,mAX | mDX); 977 } 978 else 979 { 980 regm_t rretregsx = ALLREGS & ~mAX; 981 scodelem(cdb,e2.EV.E1,&rretregsx,retregsx,true); // get rvalue 982 getregs(cdb,mAX | mDX); 983 const rregx = findreg(rretregsx); 984 cdb.gen2(0xF7,grex | modregrmx(3,opx,rregx)); // OP AX,rregx 985 } 986 freenode(e.EV.E1); 987 freenode(e2); 988 fixresult(cdb,e,mAX | mDX,pretregs); 989 return; 990 } 991 992 case OPconst: 993 const e2factor = cast(targ_size_t)el_tolong(e2); 994 995 // Multiply by a constant 996 if (I32 && sz == REGSIZE * 2) 997 { 998 /* if (msw) 999 IMUL EDX,EDX,lsw 1000 IMUL reg,EAX,msw 1001 ADD reg,EDX 1002 else 1003 IMUL reg,EDX,lsw 1004 MOV EDX,lsw 1005 MUL EDX 1006 ADD EDX,reg 1007 */ 1008 regm_t retregs = mAX | mDX; 1009 codelem(cdb,e1,&retregs,false); // eval left leaf 1010 reg_t reg = allocScratchReg(cdb, allregs & ~(mAX | mDX)); 1011 getregs(cdb,mDX | mAX); 1012 1013 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1)); 1014 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8)); 1015 1016 if (msw) 1017 { 1018 genmulimm(cdb,DX,DX,lsw); // IMUL EDX,EDX,lsw 1019 genmulimm(cdb,reg,AX,msw); // IMUL reg,EAX,msw 1020 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX 1021 } 1022 else 1023 genmulimm(cdb,reg,DX,lsw); // IMUL reg,EDX,lsw 1024 1025 movregconst(cdb,DX,lsw,0); // MOV EDX,lsw 1026 getregs(cdb,mDX); 1027 cdb.gen2(0xF7,modregrm(3,4,DX)); // MUL EDX 1028 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD EDX,reg 1029 1030 const resregx = mDX | mAX; 1031 freenode(e2); 1032 fixresult(cdb,e,resregx,pretregs); 1033 return; 1034 } 1035 1036 1037 const int pow2 = ispow2(e2factor); 1038 1039 if (sz > REGSIZE || !el_signx32(e2)) 1040 goto default; 1041 1042 if (config.target_cpu >= TARGET_80286) 1043 { 1044 if (I32 || I64) 1045 { 1046 // See if we can use an LEA instruction 1047 int ss; 1048 int ss2 = 0; 1049 int shift; 1050 1051 switch (e2factor) 1052 { 1053 case 12: ss = 1; ss2 = 2; goto L4; 1054 case 24: ss = 1; ss2 = 3; goto L4; 1055 1056 case 6: 1057 case 3: ss = 1; goto L4; 1058 1059 case 20: ss = 2; ss2 = 2; goto L4; 1060 case 40: ss = 2; ss2 = 3; goto L4; 1061 1062 case 10: 1063 case 5: ss = 2; goto L4; 1064 1065 case 36: ss = 3; ss2 = 2; goto L4; 1066 case 72: ss = 3; ss2 = 3; goto L4; 1067 1068 case 18: 1069 case 9: ss = 3; goto L4; 1070 1071 L4: 1072 { 1073 regm_t resreg = *pretregs & ALLREGS & ~(mBP | mR13); 1074 if (!resreg) 1075 resreg = isbyte ? BYTEREGS : ALLREGS & ~(mBP | mR13); 1076 1077 codelem(cdb,e.EV.E1,&resreg,false); 1078 getregs(cdb,resreg); 1079 reg_t reg = findreg(resreg); 1080 1081 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1082 modregxrmx(ss,reg,reg)); // LEA reg,[ss*reg][reg] 1083 assert((reg & 7) != BP); 1084 if (ss2) 1085 { 1086 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1087 modregxrm(ss2,reg,5)); 1088 cdb.last().IFL1 = FLconst; 1089 cdb.last().IEV1.Vint = 0; // LEA reg,0[ss2*reg] 1090 } 1091 else if (!(e2factor & 1)) // if even factor 1092 { 1093 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1094 code_orrex(cdb.last(),rex); 1095 } 1096 freenode(e2); 1097 fixresult(cdb,e,resreg,pretregs); 1098 return; 1099 } 1100 case 37: 1101 case 74: shift = 2; 1102 goto L5; 1103 case 13: 1104 case 26: shift = 0; 1105 goto L5; 1106 L5: 1107 { 1108 regm_t retregs = isbyte ? BYTEREGS : ALLREGS; 1109 regm_t resreg = *pretregs & (ALLREGS | mBP); 1110 if (!resreg) 1111 resreg = retregs; 1112 1113 // Don't use EBP 1114 resreg &= ~(mBP | mR13); 1115 if (!resreg) 1116 resreg = retregs; 1117 reg_t reg; 1118 allocreg(cdb,&resreg,®,TYint); 1119 1120 regm_t sregm = (ALLREGS & ~mR13) & ~resreg; 1121 codelem(cdb,e.EV.E1,&sregm,false); 1122 uint sreg = findreg(sregm); 1123 getregs(cdb,resreg | sregm); 1124 assert((sreg & 7) != BP); 1125 assert((reg & 7) != BP); 1126 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1127 modregxrmx(2,sreg,sreg)); // LEA reg,[sreg*4][sreg] 1128 if (shift) 1129 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift 1130 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1131 modregxrmx(3,sreg,reg)); // LEA reg,[sreg*8][reg] 1132 if (!(e2factor & 1)) // if even factor 1133 { 1134 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1135 code_orrex(cdb.last(),rex); 1136 } 1137 freenode(e2); 1138 fixresult(cdb,e,resreg,pretregs); 1139 return; 1140 } 1141 1142 default: 1143 break; 1144 } 1145 } 1146 1147 regm_t retregs = isbyte ? BYTEREGS : ALLREGS; 1148 regm_t resreg = *pretregs & (ALLREGS | mBP); 1149 if (!resreg) 1150 resreg = retregs; 1151 1152 scodelem(cdb,e.EV.E1,&retregs,0,true); // eval left leaf 1153 const regx = findreg(retregs); 1154 reg_t rreg; 1155 allocreg(cdb,&resreg,&rreg,e.Ety); 1156 1157 // IMUL regx,imm16 1158 cdb.genc2(0x69,grex | modregxrmx(3,rreg,regx),e2factor); 1159 freenode(e2); 1160 fixresult(cdb,e,resreg,pretregs); 1161 return; 1162 } 1163 goto default; 1164 1165 case OPind: 1166 if (!e2.Ecount) // if not CSE 1167 goto case OPvar; // try OP reg,EA 1168 goto default; 1169 1170 default: // OPconst and operators 1171 //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg)); 1172 if (sz <= REGSIZE) 1173 { 1174 regm_t retregs = mAX; 1175 codelem(cdb,e1,&retregs,false); // eval left leaf 1176 regm_t rretregs = isbyte ? BYTEREGS & ~mAX 1177 : ALLREGS & ~(mAX|mDX); 1178 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1179 getregs(cdb,mAX | mDX); // trash these regs 1180 reg_t rreg = findreg(rretregs); 1181 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,5 - uns,rreg)); // OP AX,rreg 1182 if (I64 && isbyte && rreg >= 4) 1183 code_orrex(cdb.last(), REX); 1184 fixresult(cdb,e,mAX,pretregs); 1185 return; 1186 } 1187 else if (sz == 2 * REGSIZE) 1188 { 1189 regm_t retregs = mDX | mAX; 1190 codelem(cdb,e1,&retregs,false); // eval left leaf 1191 if (config.target_cpu >= TARGET_PentiumPro) 1192 { 1193 regm_t rretregs = allregs & ~retregs; // second arg 1194 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1195 regm_t rlo = findreglsw(rretregs); 1196 regm_t rhi = findregmsw(rretregs); 1197 /* IMUL rhi,EAX 1198 IMUL EDX,rlo 1199 ADD rhi,EDX 1200 MUL rlo 1201 ADD EDX,rhi 1202 */ 1203 getregs(cdb,mAX|mDX|mask(rhi)); 1204 cdb.gen2(0x0FAF,modregrm(3,rhi,AX)); 1205 cdb.gen2(0x0FAF,modregrm(3,DX,rlo)); 1206 cdb.gen2(0x03,modregrm(3,rhi,DX)); 1207 cdb.gen2(0xF7,modregrm(3,4,rlo)); 1208 cdb.gen2(0x03,modregrm(3,DX,rhi)); 1209 fixresult(cdb,e,mDX|mAX,pretregs); 1210 return; 1211 } 1212 else 1213 { 1214 regm_t rretregs = mCX | mBX; // second arg 1215 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1216 callclib(cdb,e,CLIB.lmul,pretregs,0); 1217 return; 1218 } 1219 } 1220 assert(0); 1221 1222 case OPvar: 1223 if (!I16 && sz <= REGSIZE) 1224 { 1225 if (sz > 1) // no byte version 1226 { 1227 // Generate IMUL r32,r/m32 1228 regm_t retregs = *pretregs & (ALLREGS | mBP); 1229 if (!retregs) 1230 retregs = ALLREGS; 1231 codelem(cdb,e1,&retregs,false); // eval left leaf 1232 regm_t resreg = retregs; 1233 loadea(cdb,e2,&cs,0x0FAF,findreg(resreg),0,retregs,retregs); 1234 freenode(e2); 1235 fixresult(cdb,e,resreg,pretregs); 1236 return; 1237 } 1238 } 1239 else 1240 { 1241 if (sz == 2 * REGSIZE) 1242 { 1243 if (e.EV.E1.Eoper != opunslng || 1244 e1.Ecount) 1245 goto default; // have to handle it with codelem() 1246 1247 regm_t retregs = ALLREGS & ~(mAX | mDX); 1248 codelem(cdb,e1.EV.E1,&retregs,false); // eval left leaf 1249 const reg = findreg(retregs); 1250 getregs(cdb,mAX); 1251 genmovreg(cdb,AX,reg); // MOV AX,reg 1252 loadea(cdb,e2,&cs,0xF7,4,REGSIZE,mAX | mDX | mskl(reg),mAX | mDX); // MUL EA+2 1253 getregs(cdb,retregs); 1254 cdb.gen1(0x90 + reg); // XCHG AX,reg 1255 getregs(cdb,mAX | mDX); 1256 if ((cs.Irm & 0xC0) == 0xC0) // if EA is a register 1257 loadea(cdb,e2,&cs,0xF7,4,0,mAX | mskl(reg),mAX | mDX); // MUL EA 1258 else 1259 { getlvalue_lsw(&cs); 1260 cdb.gen(&cs); // MUL EA 1261 } 1262 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD DX,reg 1263 1264 freenode(e1); 1265 fixresult(cdb,e,mAX | mDX,pretregs); 1266 return; 1267 } 1268 assert(sz <= REGSIZE); 1269 } 1270 1271 // loadea() handles CWD or CLR DX for divides 1272 regm_t retregs = sz <= REGSIZE ? mAX : mDX|mAX; 1273 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1274 loadea(cdb,e2,&cs,0xF7 ^ isbyte,5 - uns,0, 1275 mAX, 1276 mAX | mDX); 1277 freenode(e2); 1278 fixresult(cdb,e,mAX,pretregs); 1279 return; 1280 } 1281 assert(0); 1282 } 1283 1284 1285 /***************************** 1286 * Handle divide, modulo and remquo. 1287 * Note that modulo isn't defined for doubles. 1288 */ 1289 1290 void cddiv(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1291 { 1292 //printf("cddiv()\n"); 1293 elem *e1 = e.EV.E1; 1294 elem *e2 = e.EV.E2; 1295 if (*pretregs == 0) // if don't want result 1296 { 1297 codelem(cdb,e1,pretregs,false); // eval left leaf 1298 *pretregs = 0; // in case they got set 1299 codelem(cdb,e2,pretregs,false); 1300 return; 1301 } 1302 1303 //printf("cddiv(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 1304 const tyml = tybasic(e1.Ety); 1305 const ty = tybasic(e.Ety); 1306 const oper = e.Eoper; 1307 1308 if (tyfloating(tyml)) 1309 { 1310 if (tyvector(tyml) || 1311 config.fpxmmregs && oper != OPmod && tyxmmreg(tyml) && 1312 !(*pretregs & mST0) && 1313 !(ty == TYldouble || ty == TYildouble) && // watch out for shrinkLongDoubleConstantIfPossible() 1314 !tycomplex(ty) && // SIMD code is not set up to deal with complex mul/div 1315 !(ty == TYllong) // or passing to function through integer register 1316 ) 1317 { 1318 orthxmm(cdb,e,pretregs); 1319 return; 1320 } 1321 if (config.exe & EX_posix) 1322 orth87(cdb,e,pretregs); 1323 else 1324 opdouble(cdb,e,pretregs,(oper == OPmul) ? CLIB.dmul : CLIB.ddiv); 1325 1326 return; 1327 } 1328 1329 if (tyxmmreg(tyml)) 1330 { 1331 orthxmm(cdb,e,pretregs); 1332 return; 1333 } 1334 1335 const uns = tyuns(tyml) || tyuns(e2.Ety); // 1 if uint operation, 0 if not 1336 const isbyte = tybyte(e.Ety) != 0; 1337 const sz = _tysize[tyml]; 1338 const ubyte rex = (I64 && sz == 8) ? REX_W : 0; 1339 const uint grex = rex << 16; 1340 1341 code cs = void; 1342 cs.Iflags = 0; 1343 cs.Irex = 0; 1344 1345 switch (e2.Eoper) 1346 { 1347 case OPconst: 1348 auto d = cast(targ_size_t)el_tolong(e2); 1349 bool neg = false; 1350 const e2factor = d; 1351 if (!uns && cast(targ_llong)e2factor < 0) 1352 { neg = true; 1353 d = -d; 1354 } 1355 1356 // Signed divide by a constant 1357 if ((d & (d - 1)) && 1358 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) && 1359 config.flags4 & CFG4speed && !uns) 1360 { 1361 /* R1 / 10 1362 * 1363 * MOV EAX,m 1364 * IMUL R1 1365 * MOV EAX,R1 1366 * SAR EAX,31 1367 * SAR EDX,shpost 1368 * SUB EDX,EAX 1369 * IMUL EAX,EDX,d 1370 * SUB R1,EAX 1371 * 1372 * EDX = quotient 1373 * R1 = remainder 1374 */ 1375 assert(sz == 4 || sz == 8); 1376 1377 ulong m; 1378 int shpost; 1379 const int N = sz * 8; 1380 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost); 1381 1382 regm_t regm = allregs & ~(mAX | mDX); 1383 codelem(cdb,e1,®m,false); // eval left leaf 1384 const reg_t reg = findreg(regm); 1385 getregs(cdb,regm | mDX | mAX); 1386 1387 /* Algorithm 5.2 1388 * if m>=2**(N-1) 1389 * q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n) 1390 * else 1391 * q = SRA(MULSH(m,n), shpost) - XSIGN(n) 1392 * if (neg) 1393 * q = -q 1394 */ 1395 const bool mgt = mhighbit || m >= (1UL << (N - 1)); 1396 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EAX,m 1397 cdb.gen2(0xF7,grex | modregrmx(3,5,reg)); // IMUL R1 1398 if (mgt) 1399 cdb.gen2(0x03,grex | modregrmx(3,DX,reg)); // ADD EDX,R1 1400 getregsNoSave(mAX); // EAX no longer contains 'm' 1401 genmovreg(cdb, AX, reg); // MOV EAX,R1 1402 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1); // SAR EAX,31 1403 if (shpost) 1404 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost); // SAR EDX,shpost 1405 reg_t r3; 1406 if (neg && oper == OPdiv) 1407 { 1408 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1409 r3 = AX; 1410 } 1411 else 1412 { 1413 cdb.gen2(0x2B,grex | modregrm(3,DX,AX)); // SUB EDX,EAX 1414 r3 = DX; 1415 } 1416 1417 // r3 is quotient 1418 regm_t resregx; 1419 switch (oper) 1420 { case OPdiv: 1421 resregx = mask(r3); 1422 break; 1423 1424 case OPmod: 1425 assert(reg != AX && r3 == DX); 1426 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1427 { 1428 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1429 } 1430 else 1431 { 1432 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1433 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1434 getregsNoSave(mAX); // EAX no longer contains 'd' 1435 } 1436 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1437 resregx = regm; 1438 break; 1439 1440 case OPremquo: 1441 assert(reg != AX && r3 == DX); 1442 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1443 { 1444 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1445 } 1446 else 1447 { 1448 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1449 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1450 } 1451 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1452 genmovreg(cdb, AX, r3); // MOV EAX,r3 1453 if (neg) 1454 cdb.gen2(0xF7,grex | modregrm(3,3,AX)); // NEG EAX 1455 genmovreg(cdb, DX, reg); // MOV EDX,R1 1456 resregx = mDX | mAX; 1457 break; 1458 1459 default: 1460 assert(0); 1461 } 1462 freenode(e2); 1463 fixresult(cdb,e,resregx,pretregs); 1464 return; 1465 } 1466 1467 // Unsigned divide by a constant 1468 if (e2factor > 2 && (e2factor & (e2factor - 1)) && 1469 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8))) && 1470 config.flags4 & CFG4speed && uns) 1471 { 1472 assert(sz == 4 || sz == 8); 1473 1474 reg_t r3; 1475 regm_t regm; 1476 reg_t reg; 1477 ulong m; 1478 int shpre; 1479 int shpost; 1480 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost)) 1481 { 1482 /* t1 = MULUH(m, n) 1483 * q = SRL(t1 + SRL(n - t1, 1), shpost - 1) 1484 * MOV EAX,reg 1485 * MOV EDX,m 1486 * MUL EDX 1487 * MOV EAX,reg 1488 * SUB EAX,EDX 1489 * SHR EAX,1 1490 * LEA R3,[EAX][EDX] 1491 * SHR R3,shpost-1 1492 */ 1493 assert(shpre == 0); 1494 1495 regm = allregs & ~(mAX | mDX); 1496 codelem(cdb,e1,®m,false); // eval left leaf 1497 reg = findreg(regm); 1498 getregs(cdb,mAX | mDX); 1499 genmovreg(cdb,AX,reg); // MOV EAX,reg 1500 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1501 getregs(cdb,regm | mDX | mAX); 1502 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1503 genmovreg(cdb,AX,reg); // MOV EAX,reg 1504 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1505 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1); // SHR EAX,1 1506 regm_t regm3 = allregs; 1507 if (oper == OPmod || oper == OPremquo) 1508 { 1509 regm3 &= ~regm; 1510 if (oper == OPremquo || !el_signx32(e2)) 1511 regm3 &= ~mAX; 1512 } 1513 allocreg(cdb,®m3,&r3,TYint); 1514 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX] 1515 if (shpost != 1) 1516 cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1); // SHR R3,shpost-1 1517 } 1518 else 1519 { 1520 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost) 1521 * SHR EAX,shpre 1522 * MOV reg,m 1523 * MUL reg 1524 * SHR EDX,shpost 1525 */ 1526 regm = mAX; 1527 if (oper == OPmod || oper == OPremquo) 1528 regm = allregs & ~(mAX|mDX); 1529 codelem(cdb,e1,®m,false); // eval left leaf 1530 reg = findreg(regm); 1531 1532 if (reg != AX) 1533 { 1534 getregs(cdb,mAX); 1535 genmovreg(cdb,AX,reg); // MOV EAX,reg 1536 } 1537 if (shpre) 1538 { 1539 getregs(cdb,mAX); 1540 cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre); // SHR EAX,shpre 1541 } 1542 getregs(cdb,mDX); 1543 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1544 getregs(cdb,mDX | mAX); 1545 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1546 if (shpost) 1547 cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost); // SHR EDX,shpost 1548 r3 = DX; 1549 } 1550 1551 regm_t resreg; 1552 switch (oper) 1553 { case OPdiv: 1554 // r3 = quotient 1555 resreg = mask(r3); 1556 break; 1557 1558 case OPmod: 1559 /* reg = original value 1560 * r3 = quotient 1561 */ 1562 assert(!(regm & mAX)); 1563 if (el_signx32(e2)) 1564 { 1565 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1566 } 1567 else 1568 { 1569 assert(!(mask(r3) & mAX)); 1570 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1571 getregs(cdb,mAX); 1572 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1573 } 1574 getregs(cdb,regm); 1575 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1576 resreg = regm; 1577 break; 1578 1579 case OPremquo: 1580 /* reg = original value 1581 * r3 = quotient 1582 */ 1583 assert(!(mask(r3) & (mAX|regm))); 1584 assert(!(regm & mAX)); 1585 if (el_signx32(e2)) 1586 { 1587 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1588 } 1589 else 1590 { 1591 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1592 getregs(cdb,mAX); 1593 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1594 } 1595 getregs(cdb,regm); 1596 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1597 genmovreg(cdb, AX, r3); // MOV EAX,r3 1598 genmovreg(cdb, DX, reg); // MOV EDX,reg 1599 resreg = mDX | mAX; 1600 break; 1601 1602 default: 1603 assert(0); 1604 } 1605 freenode(e2); 1606 fixresult(cdb,e,resreg,pretregs); 1607 return; 1608 } 1609 1610 const int pow2 = ispow2(e2factor); 1611 1612 // Register pair signed divide by power of 2 1613 if (sz == REGSIZE * 2 && 1614 (oper == OPdiv) && !uns && 1615 pow2 != -1 && 1616 I32 // not set up for I64 cent yet 1617 ) 1618 { 1619 regm_t retregs = mDX | mAX; 1620 if (pow2 == 63 && !(retregs & BYTEREGS & mLSW)) 1621 retregs = (retregs & mMSW) | (BYTEREGS & mLSW); // because of SETZ 1622 1623 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1624 const rhi = findregmsw(retregs); 1625 const rlo = findreglsw(retregs); 1626 freenode(e2); 1627 getregs(cdb,retregs); 1628 1629 if (pow2 < 32) 1630 { 1631 reg_t r1 = allocScratchReg(cdb, allregs & ~retregs); 1632 1633 genmovreg(cdb,r1,rhi); // MOV r1,rhi 1634 if (pow2 == 1) 1635 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR r1,31 1636 else 1637 { 1638 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1639 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND r1,mask 1640 } 1641 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1642 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 1643 cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2); // SHRD rlo,rhi,pow2 1644 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2); // SAR rhi,pow2 1645 } 1646 else if (pow2 == 32) 1647 { 1648 reg_t r1 = allocScratchReg(cdb, allregs & ~retregs); 1649 1650 genmovreg(cdb,r1,rhi); // MOV r1,rhi 1651 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1652 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1653 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 1654 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 1655 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 1656 } 1657 else if (pow2 < 63) 1658 { 1659 reg_t r1 = allocScratchReg(cdb, allregs & ~retregs); 1660 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1))); 1661 1662 genmovreg(cdb,r1,rhi); // MOV r1,rhi 1663 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1664 cdb.genmovreg(r2,r1); // MOV r2,r1 1665 1666 if (pow2 == 33) 1667 { 1668 cdb.gen2(0xF7,modregrmx(3,3,r1)); // NEG r1 1669 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2)); // ADD rlo,r2 1670 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1)); // ADC rhi,r1 1671 } 1672 else 1673 { 1674 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask 1675 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1676 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 1677 } 1678 1679 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 1680 cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32); // SAR rlo,pow2-32 1681 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 1682 } 1683 else 1684 { 1685 // This may be better done by cgelem.d 1686 assert(pow2 == 63); 1687 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000 1688 cdb.genregs(0x09,rlo,rhi); // OR rlo,rhi 1689 cdb.gen2(0x0F94,modregrmx(3,0,rlo)); // SETZ rlo 1690 cdb.genregs(MOVZXb,rlo,rlo); // MOVZX rlo,rloL 1691 movregconst(cdb,rhi,0,0); // MOV rhi,0 1692 } 1693 1694 fixresult(cdb,e,retregs,pretregs); 1695 return; 1696 } 1697 1698 // Register pair signed modulo by power of 2 1699 if (sz == REGSIZE * 2 && 1700 (oper == OPmod) && !uns && 1701 pow2 != -1 && 1702 I32 // not set up for I64 cent yet 1703 ) 1704 { 1705 regm_t retregs = mDX | mAX; 1706 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1707 const rhi = findregmsw(retregs); 1708 const rlo = findreglsw(retregs); 1709 freenode(e2); 1710 getregs(cdb,retregs); 1711 1712 regm_t scratchm = allregs & ~retregs; 1713 if (pow2 == 63) 1714 scratchm &= BYTEREGS; // because of SETZ 1715 reg_t r1 = allocScratchReg(cdb, scratchm); 1716 1717 if (pow2 < 32) 1718 { 1719 cdb.genmovreg(r1,rhi); // MOV r1,rhi 1720 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1721 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 1722 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1723 cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1); // AND rlo,(1<<pow2)-1 1724 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 1725 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1726 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 1727 } 1728 else if (pow2 == 32) 1729 { 1730 cdb.genmovreg(r1,rhi); // MOV r1,rhi 1731 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1732 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1733 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1734 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 1735 } 1736 else if (pow2 < 63) 1737 { 1738 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | mask(r1))); 1739 1740 cdb.genmovreg(r1,rhi); // MOV r1,rhi 1741 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 1742 cdb.genmovreg(r2,r1); // MOV r2,r1 1743 cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2); // SHRD r1,r2,64-pow2 1744 cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2); // SHR r2,64-pow2 1745 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 1746 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 1747 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND rhi,(1<<(pow2-32))-1 1748 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 1749 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2)); // SBB rhi,r2 1750 } 1751 else 1752 { 1753 // This may be better done by cgelem.d 1754 assert(pow2 == 63); 1755 1756 cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi] 1757 cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo)); // OR r1,rlo 1758 cdb.gen2(0x0F94,modregrmx(3,0,r1)); // SETZ r1 1759 cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL r1,31 1760 cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1)); // SUB rhi,r1 1761 } 1762 1763 fixresult(cdb,e,retregs,pretregs); 1764 return; 1765 } 1766 1767 if (sz > REGSIZE || !el_signx32(e2)) 1768 goto default; 1769 1770 // Special code for signed divide or modulo by power of 2 1771 if ((sz == REGSIZE || (I64 && sz == 4)) && 1772 (oper == OPdiv || oper == OPmod) && !uns && 1773 pow2 != -1 && 1774 !(config.target_cpu < TARGET_80286 && pow2 != 1 && oper == OPdiv) 1775 ) 1776 { 1777 if (pow2 == 1 && oper == OPdiv && config.target_cpu > TARGET_80386) 1778 { 1779 /* MOV r,reg 1780 SHR r,31 1781 ADD reg,r 1782 SAR reg,1 1783 */ 1784 regm_t retregs = allregs; 1785 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1786 const reg = findreg(retregs); 1787 freenode(e2); 1788 getregs(cdb,retregs); 1789 1790 reg_t r = allocScratchReg(cdb, allregs & ~retregs); 1791 genmovreg(cdb,r,reg); // MOV r,reg 1792 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31 1793 cdb.gen2(0x03,grex | modregxrmx(3,reg,r)); // ADD reg,r 1794 cdb.gen2(0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 1795 regm_t resreg = retregs; 1796 fixresult(cdb,e,resreg,pretregs); 1797 return; 1798 } 1799 1800 regm_t resreg; 1801 switch (oper) 1802 { 1803 case OPdiv: 1804 resreg = mAX; 1805 break; 1806 1807 case OPmod: 1808 resreg = mDX; 1809 break; 1810 1811 case OPremquo: 1812 resreg = mDX | mAX; 1813 break; 1814 1815 default: 1816 assert(0); 1817 } 1818 1819 regm_t retregs = mAX; 1820 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1821 freenode(e2); 1822 getregs(cdb,mAX | mDX); // modify these regs 1823 cdb.gen1(0x99); // CWD 1824 code_orrex(cdb.last(), rex); 1825 if (pow2 == 1) 1826 { 1827 if (oper == OPdiv) 1828 { 1829 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1830 cdb.gen2(0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 1831 } 1832 else // OPmod 1833 { 1834 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1835 cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1 1836 cdb.gen2(0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 1837 } 1838 } 1839 else 1840 { targ_ulong m; 1841 1842 m = (1 << pow2) - 1; 1843 if (oper == OPdiv) 1844 { 1845 cdb.genc2(0x81,grex | modregrm(3,4,DX),m); // AND DX,m 1846 cdb.gen2(0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 1847 // Be careful not to generate this for 8088 1848 assert(config.target_cpu >= TARGET_80286); 1849 cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 1850 } 1851 else // OPmod 1852 { 1853 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1854 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1855 cdb.genc2(0x81,grex | modregrm(3,4,AX),m); // AND AX,mask 1856 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1857 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1858 resreg = mAX; 1859 } 1860 } 1861 fixresult(cdb,e,resreg,pretregs); 1862 return; 1863 } 1864 goto default; 1865 1866 case OPind: 1867 if (!e2.Ecount) // if not CSE 1868 goto case OPvar; // try OP reg,EA 1869 goto default; 1870 1871 default: // OPconst and operators 1872 //printf("test2 %p, retregs = %s rretregs = %s resreg = %s\n", e, regm_str(retregs), regm_str(rretregs), regm_str(resreg)); 1873 regm_t retregs = sz <= REGSIZE ? mAX : mDX | mAX; 1874 codelem(cdb,e1,&retregs,false); // eval left leaf 1875 regm_t rretregs; 1876 if (sz <= REGSIZE) // dedicated regs for div 1877 { 1878 // pick some other regs 1879 rretregs = isbyte ? BYTEREGS & ~mAX 1880 : ALLREGS & ~(mAX|mDX); 1881 } 1882 else 1883 { 1884 assert(sz <= 2 * REGSIZE); 1885 rretregs = mCX | mBX; // second arg 1886 } 1887 scodelem(cdb,e2,&rretregs,retregs,true); // get rvalue 1888 if (sz <= REGSIZE) 1889 { 1890 getregs(cdb,mAX | mDX); // trash these regs 1891 if (uns) // unsigned divide 1892 { 1893 movregconst(cdb,DX,0,(sz == 8) ? 64 : 0); // MOV DX,0 1894 getregs(cdb,mDX); 1895 } 1896 else 1897 { 1898 cdb.gen1(0x99); // CWD 1899 code_orrex(cdb.last(),rex); 1900 } 1901 reg_t rreg = findreg(rretregs); 1902 cdb.gen2(0xF7 ^ isbyte,grex | modregrmx(3,7 - uns,rreg)); // OP AX,rreg 1903 if (I64 && isbyte && rreg >= 4) 1904 code_orrex(cdb.last(), REX); 1905 regm_t resreg; 1906 switch (oper) 1907 { 1908 case OPdiv: 1909 resreg = mAX; 1910 break; 1911 1912 case OPmod: 1913 resreg = mDX; 1914 break; 1915 1916 case OPremquo: 1917 resreg = mDX | mAX; 1918 break; 1919 1920 default: 1921 assert(0); 1922 } 1923 fixresult(cdb,e,resreg,pretregs); 1924 } 1925 else if (sz == 2 * REGSIZE) 1926 { 1927 uint lib; 1928 switch (oper) 1929 { 1930 case OPdiv: 1931 case OPremquo: 1932 lib = uns ? CLIB.uldiv : CLIB.ldiv; 1933 break; 1934 1935 case OPmod: 1936 lib = uns ? CLIB.ulmod : CLIB.lmod; 1937 break; 1938 1939 default: 1940 assert(0); 1941 } 1942 1943 regm_t keepregs = I32 ? mSI | mDI : 0; 1944 callclib(cdb,e,lib,pretregs,keepregs); 1945 } 1946 else 1947 assert(0); 1948 return; 1949 1950 case OPvar: 1951 if (I16 || sz == 2 * REGSIZE) 1952 goto default; // have to handle it with codelem() 1953 1954 // loadea() handles CWD or CLR DX for divides 1955 regm_t retregs = mAX; 1956 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 1957 loadea(cdb,e2,&cs,0xF7 ^ isbyte,7 - uns,0, 1958 mAX | mDX, 1959 mAX | mDX); 1960 freenode(e2); 1961 regm_t resreg; 1962 switch (oper) 1963 { 1964 case OPdiv: 1965 resreg = mAX; 1966 break; 1967 1968 case OPmod: 1969 resreg = mDX; 1970 break; 1971 1972 case OPremquo: 1973 resreg = mDX | mAX; 1974 break; 1975 1976 default: 1977 assert(0); 1978 } 1979 fixresult(cdb,e,resreg,pretregs); 1980 return; 1981 } 1982 assert(0); 1983 } 1984 1985 1986 /*************************** 1987 * Handle OPnot and OPbool. 1988 * Generate: 1989 * c: [evaluate e1] 1990 * cfalse: [save reg code] 1991 * clr reg 1992 * jmp cnop 1993 * ctrue: [save reg code] 1994 * clr reg 1995 * inc reg 1996 * cnop: nop 1997 */ 1998 1999 void cdnot(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2000 { 2001 //printf("cdnot()\n"); 2002 reg_t reg; 2003 tym_t forflags; 2004 regm_t retregs; 2005 elem *e1 = e.EV.E1; 2006 2007 if (*pretregs == 0) 2008 goto L1; 2009 if (*pretregs == mPSW) 2010 { //assert(e.Eoper != OPnot && e.Eoper != OPbool);*/ /* should've been optimized 2011 L1: 2012 codelem(cdb,e1,pretregs,false); // evaluate e1 for cc 2013 return; 2014 } 2015 2016 OPER op = e.Eoper; 2017 uint sz = tysize(e1.Ety); 2018 uint rex = (I64 && sz == 8) ? REX_W : 0; 2019 uint grex = rex << 16; 2020 2021 if (!tyfloating(e1.Ety)) 2022 { 2023 if (sz <= REGSIZE && e1.Eoper == OPvar) 2024 { code cs; 2025 2026 getlvalue(cdb,&cs,e1,0); 2027 freenode(e1); 2028 if (!I16 && sz == 2) 2029 cs.Iflags |= CFopsize; 2030 2031 retregs = *pretregs & (ALLREGS | mBP); 2032 if (config.target_cpu >= TARGET_80486 && 2033 tysize(e.Ety) == 1) 2034 { 2035 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,0,®)) 2036 { 2037 cs.Iop = 0x39; 2038 if (I64 && (sz == 1) && reg >= 4) 2039 cs.Irex |= REX; 2040 } 2041 else 2042 { cs.Iop = 0x81; 2043 reg = 7; 2044 cs.IFL2 = FLconst; 2045 cs.IEV2.Vint = 0; 2046 } 2047 cs.Iop ^= (sz == 1); 2048 code_newreg(&cs,reg); 2049 cdb.gen(&cs); // CMP e1,0 2050 2051 retregs &= BYTEREGS; 2052 if (!retregs) 2053 retregs = BYTEREGS; 2054 allocreg(cdb,&retregs,®,TYint); 2055 2056 const opcode_t iop = (op == OPbool) 2057 ? 0x0F95 // SETNZ rm8 2058 : 0x0F94; // SETZ rm8 2059 cdb.gen2(iop, modregrmx(3,0,reg)); 2060 if (reg >= 4) 2061 code_orrex(cdb.last(), REX); 2062 if (op == OPbool) 2063 *pretregs &= ~mPSW; 2064 goto L4; 2065 } 2066 2067 if (reghasvalue((sz == 1) ? BYTEREGS : ALLREGS,1,®)) 2068 cs.Iop = 0x39; 2069 else 2070 { cs.Iop = 0x81; 2071 reg = 7; 2072 cs.IFL2 = FLconst; 2073 cs.IEV2.Vint = 1; 2074 } 2075 if (I64 && (sz == 1) && reg >= 4) 2076 cs.Irex |= REX; 2077 cs.Iop ^= (sz == 1); 2078 code_newreg(&cs,reg); 2079 cdb.gen(&cs); // CMP e1,1 2080 2081 allocreg(cdb,&retregs,®,TYint); 2082 op ^= (OPbool ^ OPnot); // switch operators 2083 goto L2; 2084 } 2085 else if (config.target_cpu >= TARGET_80486 && 2086 tysize(e.Ety) == 1) 2087 { 2088 int jop = jmpopcode(e.EV.E1); 2089 retregs = mPSW; 2090 codelem(cdb,e.EV.E1,&retregs,false); 2091 retregs = *pretregs & BYTEREGS; 2092 if (!retregs) 2093 retregs = BYTEREGS; 2094 allocreg(cdb,&retregs,®,TYint); 2095 2096 int iop = 0x0F90 | (jop & 0x0F); // SETcc rm8 2097 if (op == OPnot) 2098 iop ^= 1; 2099 cdb.gen2(iop,grex | modregrmx(3,0,reg)); 2100 if (reg >= 4) 2101 code_orrex(cdb.last(), REX); 2102 if (op == OPbool) 2103 *pretregs &= ~mPSW; 2104 goto L4; 2105 } 2106 else if (sz <= REGSIZE && 2107 // NEG bytereg is too expensive 2108 (sz != 1 || config.target_cpu < TARGET_PentiumPro)) 2109 { 2110 retregs = *pretregs & (ALLREGS | mBP); 2111 if (sz == 1 && !(retregs &= BYTEREGS)) 2112 retregs = BYTEREGS; 2113 codelem(cdb,e.EV.E1,&retregs,false); 2114 reg = findreg(retregs); 2115 getregs(cdb,retregs); 2116 cdb.gen2(sz == 1 ? 0xF6 : 0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2117 code_orflag(cdb.last(),CFpsw); 2118 if (!I16 && sz == SHORTSIZE) 2119 code_orflag(cdb.last(),CFopsize); 2120 L2: 2121 genregs(cdb,0x19,reg,reg); // SBB reg,reg 2122 code_orrex(cdb.last(), rex); 2123 // At this point, reg==0 if e1==0, reg==-1 if e1!=0 2124 if (op == OPnot) 2125 { 2126 if (I64) 2127 cdb.gen2(0xFF,grex | modregrmx(3,0,reg)); // INC reg 2128 else 2129 cdb.gen1(0x40 + reg); // INC reg 2130 } 2131 else 2132 cdb.gen2(0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2133 if (*pretregs & mPSW) 2134 { code_orflag(cdb.last(),CFpsw); 2135 *pretregs &= ~mPSW; // flags are always set anyway 2136 } 2137 L4: 2138 fixresult(cdb,e,retregs,pretregs); 2139 return; 2140 } 2141 } 2142 code *cnop = gennop(null); 2143 code *ctrue = gennop(null); 2144 logexp(cdb,e.EV.E1,(op == OPnot) ? false : true,FLcode,ctrue); 2145 forflags = *pretregs & mPSW; 2146 if (I64 && sz == 8) 2147 forflags |= 64; 2148 assert(tysize(e.Ety) <= REGSIZE); // result better be int 2149 CodeBuilder cdbfalse; 2150 cdbfalse.ctor(); 2151 allocreg(cdbfalse,pretregs,®,e.Ety); // allocate reg for result 2152 code *cfalse = cdbfalse.finish(); 2153 CodeBuilder cdbtrue; 2154 cdbtrue.ctor(); 2155 cdbtrue.append(ctrue); 2156 for (code *c1 = cfalse; c1; c1 = code_next(c1)) 2157 cdbtrue.gen(c1); // duplicate reg save code 2158 CodeBuilder cdbfalse2; 2159 cdbfalse2.ctor(); 2160 movregconst(cdbfalse2,reg,0,forflags); // mov 0 into reg 2161 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2162 movregconst(cdbtrue,reg,1,forflags); // mov 1 into reg 2163 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2164 genjmp(cdbfalse2,JMP,FLcode,cast(block *) cnop); // skip over ctrue 2165 cdb.append(cfalse); 2166 cdb.append(cdbfalse2); 2167 cdb.append(cdbtrue); 2168 cdb.append(cnop); 2169 } 2170 2171 2172 /************************ 2173 * Complement operator 2174 */ 2175 2176 void cdcom(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2177 { 2178 if (*pretregs == 0) 2179 { 2180 codelem(cdb,e.EV.E1,pretregs,false); 2181 return; 2182 } 2183 tym_t tym = tybasic(e.Ety); 2184 int sz = _tysize[tym]; 2185 uint rex = (I64 && sz == 8) ? REX_W : 0; 2186 regm_t possregs = (sz == 1) ? BYTEREGS : allregs; 2187 regm_t retregs = *pretregs & possregs; 2188 if (retregs == 0) 2189 retregs = possregs; 2190 codelem(cdb,e.EV.E1,&retregs,false); 2191 getregs(cdb,retregs); // retregs will be destroyed 2192 2193 if (0 && sz == 4 * REGSIZE) 2194 { 2195 cdb.gen2(0xF7,modregrm(3,2,AX)); // NOT AX 2196 cdb.gen2(0xF7,modregrm(3,2,BX)); // NOT BX 2197 cdb.gen2(0xF7,modregrm(3,2,CX)); // NOT CX 2198 cdb.gen2(0xF7,modregrm(3,2,DX)); // NOT DX 2199 } 2200 else 2201 { 2202 const reg = (sz <= REGSIZE) ? findreg(retregs) : findregmsw(retregs); 2203 const op = (sz == 1) ? 0xF6 : 0xF7; 2204 genregs(cdb,op,2,reg); // NOT reg 2205 code_orrex(cdb.last(), rex); 2206 if (I64 && sz == 1 && reg >= 4) 2207 code_orrex(cdb.last(), REX); 2208 if (sz == 2 * REGSIZE) 2209 { 2210 const reg2 = findreglsw(retregs); 2211 genregs(cdb,op,2,reg2); // NOT reg+1 2212 } 2213 } 2214 fixresult(cdb,e,retregs,pretregs); 2215 } 2216 2217 /************************ 2218 * Bswap operator 2219 */ 2220 2221 void cdbswap(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2222 { 2223 if (*pretregs == 0) 2224 { 2225 codelem(cdb,e.EV.E1,pretregs,false); 2226 return; 2227 } 2228 2229 const tym = tybasic(e.Ety); 2230 const sz = _tysize[tym]; 2231 const posregs = (sz == 2) ? BYTEREGS : allregs; 2232 regm_t retregs = *pretregs & posregs; 2233 if (retregs == 0) 2234 retregs = posregs; 2235 codelem(cdb,e.EV.E1,&retregs,false); 2236 getregs(cdb,retregs); // retregs will be destroyed 2237 if (sz == 2 * REGSIZE) 2238 { 2239 assert(sz != 16); // no cent support yet 2240 const msreg = findregmsw(retregs); 2241 cdb.gen1(0x0FC8 + (msreg & 7)); // BSWAP msreg 2242 const lsreg = findreglsw(retregs); 2243 cdb.gen1(0x0FC8 + (lsreg & 7)); // BSWAP lsreg 2244 cdb.gen2(0x87,modregrm(3,msreg,lsreg)); // XCHG msreg,lsreg 2245 } 2246 else 2247 { 2248 const reg = findreg(retregs); 2249 if (sz == 2) 2250 { 2251 genregs(cdb,0x86,reg+4,reg); // XCHG regL,regH 2252 } 2253 else 2254 { 2255 assert(sz == 4 || sz == 8); 2256 cdb.gen1(0x0FC8 + (reg & 7)); // BSWAP reg 2257 ubyte rex = 0; 2258 if (sz == 8) 2259 rex |= REX_W; 2260 if (reg & 8) 2261 rex |= REX_B; 2262 if (rex) 2263 code_orrex(cdb.last(), rex); 2264 } 2265 } 2266 fixresult(cdb,e,retregs,pretregs); 2267 } 2268 2269 /************************* 2270 * ?: operator 2271 */ 2272 2273 void cdcond(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2274 { 2275 con_t regconold,regconsave; 2276 uint stackpushold,stackpushsave; 2277 int ehindexold,ehindexsave; 2278 uint sz2; 2279 2280 /* vars to save state of 8087 */ 2281 int stackusedold,stackusedsave; 2282 NDP[global87.stack.length] _8087old; 2283 NDP[global87.stack.length] _8087save; 2284 2285 //printf("cdcond(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 2286 elem *e1 = e.EV.E1; 2287 elem *e2 = e.EV.E2; 2288 elem *e21 = e2.EV.E1; 2289 elem *e22 = e2.EV.E2; 2290 regm_t psw = *pretregs & mPSW; /* save PSW bit */ 2291 const op1 = e1.Eoper; 2292 uint sz1 = tysize(e1.Ety); 2293 uint jop = jmpopcode(e1); 2294 2295 uint jop1 = jmpopcode(e21); 2296 uint jop2 = jmpopcode(e22); 2297 2298 docommas(cdb,&e1); 2299 cgstate.stackclean++; 2300 2301 if (!OTrel(op1) && e1 == e21 && 2302 sz1 <= REGSIZE && !tyfloating(e1.Ety)) 2303 { // Recognize (e ? e : f) 2304 2305 code *cnop1 = gennop(null); 2306 regm_t retregs = *pretregs | mPSW; 2307 codelem(cdb,e1,&retregs,false); 2308 2309 cse_flush(cdb,1); // flush CSEs to memory 2310 genjmp(cdb,jop,FLcode,cast(block *)cnop1); 2311 freenode(e21); 2312 2313 regconsave = regcon; 2314 stackpushsave = stackpush; 2315 2316 retregs |= psw; 2317 if (retregs & (mBP | ALLREGS)) 2318 regimmed_set(findreg(retregs),0); 2319 codelem(cdb,e22,&retregs,false); 2320 2321 andregcon(®consave); 2322 assert(stackpushsave == stackpush); 2323 2324 *pretregs = retregs; 2325 freenode(e2); 2326 cdb.append(cnop1); 2327 cgstate.stackclean--; 2328 return; 2329 } 2330 2331 if (OTrel(op1) && sz1 <= REGSIZE && tysize(e2.Ety) <= REGSIZE && 2332 !e1.Ecount && 2333 (jop == JC || jop == JNC) && 2334 (sz2 = tysize(e2.Ety)) <= REGSIZE && 2335 e21.Eoper == OPconst && 2336 e22.Eoper == OPconst 2337 ) 2338 { 2339 uint sz = tysize(e.Ety); 2340 uint rex = (I64 && sz == 8) ? REX_W : 0; 2341 uint grex = rex << 16; 2342 2343 regm_t retregs; 2344 targ_size_t v1,v2; 2345 2346 if (sz2 != 1 || I64) 2347 { 2348 retregs = *pretregs & (ALLREGS | mBP); 2349 if (!retregs) 2350 retregs = ALLREGS; 2351 } 2352 else 2353 { 2354 retregs = *pretregs & BYTEREGS; 2355 if (!retregs) 2356 retregs = BYTEREGS; 2357 } 2358 2359 cdcmp_flag = 1 | rex; 2360 v1 = cast(targ_size_t)e21.EV.Vllong; 2361 v2 = cast(targ_size_t)e22.EV.Vllong; 2362 if (jop == JNC) 2363 { v1 = v2; 2364 v2 = cast(targ_size_t)e21.EV.Vllong; 2365 } 2366 2367 opcode_t opcode = 0x81; 2368 switch (sz2) 2369 { case 1: opcode--; 2370 v1 = cast(byte) v1; 2371 v2 = cast(byte) v2; 2372 break; 2373 2374 case 2: v1 = cast(short) v1; 2375 v2 = cast(short) v2; 2376 break; 2377 2378 case 4: v1 = cast(int) v1; 2379 v2 = cast(int) v2; 2380 break; 2381 default: 2382 break; 2383 } 2384 2385 if (I64 && v1 != cast(targ_ullong)cast(targ_ulong)v1) 2386 { 2387 // only zero-extension from 32-bits is available for 'or' 2388 } 2389 else if (I64 && cast(targ_llong)v2 != cast(targ_llong)cast(targ_long)v2) 2390 { 2391 // only sign-extension from 32-bits is available for 'and' 2392 } 2393 else 2394 { 2395 codelem(cdb,e1,&retregs,false); 2396 const reg = findreg(retregs); 2397 2398 if (v1 == 0 && v2 == ~cast(targ_size_t)0) 2399 { 2400 cdb.gen2(0xF6 + (opcode & 1),grex | modregrmx(3,2,reg)); // NOT reg 2401 if (I64 && sz2 == REGSIZE) 2402 code_orrex(cdb.last(), REX_W); 2403 } 2404 else 2405 { 2406 v1 -= v2; 2407 cdb.genc2(opcode,grex | modregrmx(3,4,reg),v1); // AND reg,v1-v2 2408 if (I64 && sz2 == 1 && reg >= 4) 2409 code_orrex(cdb.last(), REX); 2410 if (v2 == 1 && !I64) 2411 cdb.gen1(0x40 + reg); // INC reg 2412 else if (v2 == -1L && !I64) 2413 cdb.gen1(0x48 + reg); // DEC reg 2414 else 2415 { cdb.genc2(opcode,grex | modregrmx(3,0,reg),v2); // ADD reg,v2 2416 if (I64 && sz2 == 1 && reg >= 4) 2417 code_orrex(cdb.last(), REX); 2418 } 2419 } 2420 2421 freenode(e21); 2422 freenode(e22); 2423 freenode(e2); 2424 2425 fixresult(cdb,e,retregs,pretregs); 2426 cgstate.stackclean--; 2427 return; 2428 } 2429 } 2430 2431 if (op1 != OPcond && op1 != OPandand && op1 != OPoror && 2432 op1 != OPnot && op1 != OPbool && 2433 e21.Eoper == OPconst && 2434 sz1 <= REGSIZE && 2435 *pretregs & (mBP | ALLREGS) && 2436 tysize(e21.Ety) <= REGSIZE && !tyfloating(e21.Ety)) 2437 { // Recognize (e ? c : f) 2438 2439 code *cnop1 = gennop(null); 2440 regm_t retregs = mPSW; 2441 jop = jmpopcode(e1); // get jmp condition 2442 codelem(cdb,e1,&retregs,false); 2443 2444 // Set the register with e21 without affecting the flags 2445 retregs = *pretregs & (ALLREGS | mBP); 2446 if (retregs & ~regcon.mvar) 2447 retregs &= ~regcon.mvar; // don't disturb register variables 2448 // NOTE: see my email (sign extension bug? possible fix, some questions 2449 reg_t reg; 2450 regwithvalue(cdb,retregs,cast(targ_size_t)e21.EV.Vllong,®,tysize(e21.Ety) == 8 ? 64|8 : 8); 2451 retregs = mask(reg); 2452 2453 cse_flush(cdb,1); // flush CSE's to memory 2454 genjmp(cdb,jop,FLcode,cast(block *)cnop1); 2455 freenode(e21); 2456 2457 regconsave = regcon; 2458 stackpushsave = stackpush; 2459 2460 codelem(cdb,e22,&retregs,false); 2461 2462 andregcon(®consave); 2463 assert(stackpushsave == stackpush); 2464 2465 freenode(e2); 2466 cdb.append(cnop1); 2467 fixresult(cdb,e,retregs,pretregs); 2468 cgstate.stackclean--; 2469 return; 2470 } 2471 2472 code *cnop1 = gennop(null); 2473 code *cnop2 = gennop(null); // dummy target addresses 2474 logexp(cdb,e1,false,FLcode,cnop1); // evaluate condition 2475 regconold = regcon; 2476 stackusedold = global87.stackused; 2477 stackpushold = stackpush; 2478 memcpy(_8087old.ptr,global87.stack.ptr,global87.stack.sizeof); 2479 regm_t retregs = *pretregs; 2480 CodeBuilder cdb1; 2481 cdb1.ctor(); 2482 if (psw && jop1 != JNE) 2483 { 2484 retregs &= ~mPSW; 2485 if (!retregs) 2486 retregs = ALLREGS; 2487 codelem(cdb1,e21,&retregs,false); 2488 fixresult(cdb1,e21,retregs,pretregs); 2489 } 2490 else 2491 codelem(cdb1,e21,&retregs,false); 2492 2493 if (CPP && e2.Eoper == OPcolon2) 2494 { 2495 code cs; 2496 2497 // This is necessary so that any cleanup code on one branch 2498 // is redone on the other branch. 2499 cs.Iop = ESCAPE | ESCmark2; 2500 cs.Iflags = 0; 2501 cs.Irex = 0; 2502 cdb.gen(&cs); 2503 cdb.append(cdb1); 2504 cs.Iop = ESCAPE | ESCrelease2; 2505 cdb.gen(&cs); 2506 } 2507 else 2508 cdb.append(cdb1); 2509 2510 regconsave = regcon; 2511 regcon = regconold; 2512 2513 stackpushsave = stackpush; 2514 stackpush = stackpushold; 2515 2516 stackusedsave = global87.stackused; 2517 global87.stackused = stackusedold; 2518 2519 memcpy(_8087save.ptr,global87.stack.ptr,global87.stack.sizeof); 2520 memcpy(global87.stack.ptr,_8087old.ptr,global87.stack.sizeof); 2521 2522 retregs |= psw; // PSW bit may have been trashed 2523 *pretregs |= psw; 2524 CodeBuilder cdb2; 2525 cdb2.ctor(); 2526 if (psw && jop2 != JNE) 2527 { 2528 retregs &= ~mPSW; 2529 if (!retregs) 2530 retregs = ALLREGS; 2531 codelem(cdb2,e22,&retregs,false); 2532 fixresult(cdb2,e22,retregs,pretregs); 2533 } 2534 else 2535 codelem(cdb2,e22,&retregs,false); // use same regs as E1 2536 *pretregs = retregs | psw; 2537 andregcon(®conold); 2538 andregcon(®consave); 2539 assert(global87.stackused == stackusedsave); 2540 assert(stackpush == stackpushsave); 2541 memcpy(global87.stack.ptr,_8087save.ptr,global87.stack.sizeof); 2542 freenode(e2); 2543 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); 2544 cdb.append(cnop1); 2545 cdb.append(cdb2); 2546 cdb.append(cnop2); 2547 if (*pretregs & mST0) 2548 note87(e,0,0); 2549 2550 cgstate.stackclean--; 2551 } 2552 2553 /********************* 2554 * Comma operator OPcomma 2555 */ 2556 2557 void cdcomma(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2558 { 2559 regm_t retregs = 0; 2560 codelem(cdb,e.EV.E1,&retregs,false); // ignore value from left leaf 2561 codelem(cdb,e.EV.E2,pretregs,false); // do right leaf 2562 } 2563 2564 2565 /********************************* 2566 * Do && and || operators. 2567 * Generate: 2568 * (evaluate e1 and e2, if true goto cnop1) 2569 * cnop3: NOP 2570 * cg: [save reg code] ;if we must preserve reg 2571 * CLR reg ;false result (set Z also) 2572 * JMP cnop2 2573 * 2574 * cnop1: NOP ;if e1 evaluates to true 2575 * [save reg code] ;preserve reg 2576 * 2577 * MOV reg,1 ;true result 2578 * or 2579 * CLR reg ;if return result in flags 2580 * INC reg 2581 * 2582 * cnop2: NOP ;mark end of code 2583 */ 2584 2585 void cdloglog(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2586 { 2587 /* We can trip the assert with the following: 2588 * if ( (b<=a) ? (c<b || a<=c) : c>=a ) 2589 * We'll generate ugly code for it, but it's too obscure a case 2590 * to expend much effort on it. 2591 * assert(*pretregs != mPSW); 2592 */ 2593 2594 cgstate.stackclean++; 2595 code *cnop1 = gennop(null); 2596 CodeBuilder cdb1; 2597 cdb1.ctor(); 2598 cdb1.append(cnop1); 2599 code *cnop3 = gennop(null); 2600 elem *e2 = e.EV.E2; 2601 (e.Eoper == OPoror) 2602 ? logexp(cdb,e.EV.E1,1,FLcode,cnop1) 2603 : logexp(cdb,e.EV.E1,0,FLcode,cnop3); 2604 con_t regconsave = regcon; 2605 uint stackpushsave = stackpush; 2606 if (*pretregs == 0) // if don't want result 2607 { 2608 int noreturn = !el_returns(e2); 2609 codelem(cdb,e2,pretregs,false); 2610 if (noreturn) 2611 { 2612 regconsave.used |= regcon.used; 2613 regcon = regconsave; 2614 } 2615 else 2616 andregcon(®consave); 2617 assert(stackpush == stackpushsave); 2618 cdb.append(cnop3); 2619 cdb.append(cdb1); // eval code, throw away result 2620 cgstate.stackclean--; 2621 return; 2622 } 2623 code *cnop2 = gennop(null); 2624 uint sz = tysize(e.Ety); 2625 if (tybasic(e2.Ety) == TYbool && 2626 sz == tysize(e2.Ety) && 2627 !(*pretregs & mPSW) && 2628 e2.Eoper == OPcall) 2629 { 2630 codelem(cdb,e2,pretregs,false); 2631 2632 andregcon(®consave); 2633 2634 // stack depth should not change when evaluating E2 2635 assert(stackpush == stackpushsave); 2636 2637 assert(sz <= 4); // result better be int 2638 regm_t retregs = *pretregs & allregs; 2639 reg_t reg; 2640 allocreg(cdb1,&retregs,®,TYint); // allocate reg for result 2641 movregconst(cdb1,reg,e.Eoper == OPoror,0); // reg = 1 2642 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2643 *pretregs = retregs; 2644 if (e.Eoper == OPoror) 2645 { 2646 cdb.append(cnop3); 2647 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2648 cdb.append(cdb1); 2649 cdb.append(cnop2); 2650 } 2651 else 2652 { 2653 genjmp(cdb,JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2654 cdb.append(cnop3); 2655 cdb.append(cdb1); 2656 cdb.append(cnop2); 2657 } 2658 cgstate.stackclean--; 2659 return; 2660 } 2661 logexp(cdb,e2,1,FLcode,cnop1); 2662 andregcon(®consave); 2663 2664 // stack depth should not change when evaluating E2 2665 assert(stackpush == stackpushsave); 2666 2667 assert(sz <= 4); // result better be int 2668 regm_t retregs = *pretregs & (ALLREGS | mBP); 2669 if (!retregs) 2670 retregs = ALLREGS; // if mPSW only 2671 CodeBuilder cdbcg; 2672 cdbcg.ctor(); 2673 reg_t reg; 2674 allocreg(cdbcg,&retregs,®,TYint); // allocate reg for result 2675 code *cg = cdbcg.finish(); 2676 for (code *c1 = cg; c1; c1 = code_next(c1)) // for each instruction 2677 cdb1.gen(c1); // duplicate it 2678 CodeBuilder cdbcg2; 2679 cdbcg2.ctor(); 2680 movregconst(cdbcg2,reg,0,*pretregs & mPSW); // MOV reg,0 2681 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2682 genjmp(cdbcg2, JMP,FLcode,cast(block *) cnop2); // JMP cnop2 2683 movregconst(cdb1,reg,1,*pretregs & mPSW); // reg = 1 2684 regcon.immed.mval &= ~mask(reg); // mark reg as unavail 2685 *pretregs = retregs; 2686 cdb.append(cnop3); 2687 cdb.append(cg); 2688 cdb.append(cdbcg2); 2689 cdb.append(cdb1); 2690 cdb.append(cnop2); 2691 cgstate.stackclean--; 2692 return; 2693 } 2694 2695 2696 /********************* 2697 * Generate code for shift left or shift right (OPshl,OPshr,OPashr,OProl,OPror). 2698 */ 2699 2700 void cdshift(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2701 { 2702 reg_t resreg; 2703 uint shiftcnt; 2704 regm_t retregs,rretregs; 2705 2706 //printf("cdshift()\n"); 2707 elem *e1 = e.EV.E1; 2708 if (*pretregs == 0) // if don't want result 2709 { 2710 codelem(cdb,e1,pretregs,false); // eval left leaf 2711 *pretregs = 0; // in case they got set 2712 codelem(cdb,e.EV.E2,pretregs,false); 2713 return; 2714 } 2715 2716 tym_t tyml = tybasic(e1.Ety); 2717 int sz = _tysize[tyml]; 2718 assert(!tyfloating(tyml)); 2719 OPER oper = e.Eoper; 2720 uint grex = ((I64 && sz == 8) ? REX_W : 0) << 16; 2721 2722 version (SCPP) 2723 { 2724 // Do this until the rest of the compiler does OPshr/OPashr correctly 2725 if (oper == OPshr) 2726 oper = (tyuns(tyml)) ? OPshr : OPashr; 2727 } 2728 2729 uint s1,s2; 2730 switch (oper) 2731 { 2732 case OPshl: 2733 s1 = 4; // SHL 2734 s2 = 2; // RCL 2735 break; 2736 case OPshr: 2737 s1 = 5; // SHR 2738 s2 = 3; // RCR 2739 break; 2740 case OPashr: 2741 s1 = 7; // SAR 2742 s2 = 3; // RCR 2743 break; 2744 case OProl: 2745 s1 = 0; // ROL 2746 break; 2747 case OPror: 2748 s1 = 1; // ROR 2749 break; 2750 default: 2751 assert(0); 2752 } 2753 2754 reg_t sreg = NOREG; // guard against using value without assigning to sreg 2755 elem *e2 = e.EV.E2; 2756 regm_t forccs = *pretregs & mPSW; // if return result in CCs 2757 regm_t forregs = *pretregs & (ALLREGS | mBP); // mask of possible return regs 2758 bool e2isconst = false; // assume for the moment 2759 uint isbyte = (sz == 1); 2760 switch (e2.Eoper) 2761 { 2762 case OPconst: 2763 e2isconst = true; // e2 is a constant 2764 shiftcnt = e2.EV.Vint; // get shift count 2765 if ((!I16 && sz <= REGSIZE) || 2766 shiftcnt <= 4 || // if sequence of shifts 2767 (sz == 2 && 2768 (shiftcnt == 8 || config.target_cpu >= TARGET_80286)) || 2769 (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 2770 ) 2771 { 2772 retregs = (forregs) ? forregs 2773 : ALLREGS; 2774 if (isbyte) 2775 { retregs &= BYTEREGS; 2776 if (!retregs) 2777 retregs = BYTEREGS; 2778 } 2779 else if (sz > REGSIZE && sz <= 2 * REGSIZE && 2780 !(retregs & mMSW)) 2781 retregs |= mMSW & ALLREGS; 2782 if (s1 == 7) // if arithmetic right shift 2783 { 2784 if (shiftcnt == 8) 2785 retregs = mAX; 2786 else if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE) 2787 retregs = mDX|mAX; 2788 } 2789 2790 if (sz == 2 * REGSIZE && shiftcnt == 8 * REGSIZE && 2791 oper == OPshl && 2792 !e1.Ecount && 2793 (e1.Eoper == OPs16_32 || e1.Eoper == OPu16_32 || 2794 e1.Eoper == OPs32_64 || e1.Eoper == OPu32_64) 2795 ) 2796 { // Handle (shtlng)s << 16 2797 regm_t r = retregs & mMSW; 2798 codelem(cdb,e1.EV.E1,&r,false); // eval left leaf 2799 regwithvalue(cdb,retregs & mLSW,0,&resreg,0); 2800 getregs(cdb,r); 2801 retregs = r | mask(resreg); 2802 if (forccs) 2803 { sreg = findreg(r); 2804 gentstreg(cdb,sreg); 2805 *pretregs &= ~mPSW; // already set 2806 } 2807 freenode(e1); 2808 freenode(e2); 2809 break; 2810 } 2811 2812 // See if we should use LEA reg,xxx instead of shift 2813 if (!I16 && shiftcnt >= 1 && shiftcnt <= 3 && 2814 (sz == REGSIZE || (I64 && sz == 4)) && 2815 oper == OPshl && 2816 e1.Eoper == OPvar && 2817 !(*pretregs & mPSW) && 2818 config.flags4 & CFG4speed 2819 ) 2820 { 2821 reg_t reg; 2822 regm_t regm; 2823 2824 if (isregvar(e1,®m,®) && !(regm & retregs)) 2825 { code cs; 2826 allocreg(cdb,&retregs,&resreg,e.Ety); 2827 buildEA(&cs,-1,reg,1 << shiftcnt,0); 2828 cs.Iop = LEA; 2829 code_newreg(&cs,resreg); 2830 cs.Iflags = 0; 2831 if (I64 && sz == 8) 2832 cs.Irex |= REX_W; 2833 cdb.gen(&cs); // LEA resreg,[reg * ss] 2834 freenode(e1); 2835 freenode(e2); 2836 break; 2837 } 2838 } 2839 2840 codelem(cdb,e1,&retregs,false); // eval left leaf 2841 //assert((retregs & regcon.mvar) == 0); 2842 getregs(cdb,retregs); // modify these regs 2843 2844 { 2845 if (sz == 2 * REGSIZE) 2846 { resreg = findregmsw(retregs); 2847 sreg = findreglsw(retregs); 2848 } 2849 else 2850 { resreg = findreg(retregs); 2851 sreg = NOREG; // an invalid value 2852 } 2853 if (config.target_cpu >= TARGET_80286 && 2854 sz <= REGSIZE) 2855 { 2856 // SHL resreg,shiftcnt 2857 assert(!(sz == 1 && (mask(resreg) & ~BYTEREGS))); 2858 cdb.genc2(0xC1 ^ isbyte,grex | modregxrmx(3,s1,resreg),shiftcnt); 2859 if (shiftcnt == 1) 2860 cdb.last().Iop += 0x10; // short form of shift 2861 if (I64 && sz == 1 && resreg >= 4) 2862 cdb.last().Irex |= REX; 2863 // See if we need operand size prefix 2864 if (!I16 && oper != OPshl && sz == 2) 2865 cdb.last().Iflags |= CFopsize; 2866 if (forccs) 2867 cdb.last().Iflags |= CFpsw; // need flags result 2868 } 2869 else if (shiftcnt == 8) 2870 { if (!(retregs & BYTEREGS) || resreg >= 4) 2871 { 2872 goto L1; 2873 } 2874 2875 if (pass != PASSfinal && (!forregs || forregs & (mSI | mDI))) 2876 { 2877 // e1 might get into SI or DI in a later pass, 2878 // so don't put CX into a register 2879 getregs(cdb,mCX); 2880 } 2881 2882 assert(sz == 2); 2883 switch (oper) 2884 { 2885 case OPshl: 2886 // MOV regH,regL XOR regL,regL 2887 assert(resreg < 4 && !grex); 2888 genregs(cdb,0x8A,resreg+4,resreg); 2889 genregs(cdb,0x32,resreg,resreg); 2890 break; 2891 2892 case OPshr: 2893 case OPashr: 2894 // MOV regL,regH 2895 genregs(cdb,0x8A,resreg,resreg+4); 2896 if (oper == OPashr) 2897 cdb.gen1(0x98); // CBW 2898 else 2899 genregs(cdb,0x32,resreg+4,resreg+4); // CLR regH 2900 break; 2901 2902 case OPror: 2903 case OProl: 2904 // XCHG regL,regH 2905 genregs(cdb,0x86,resreg+4,resreg); 2906 break; 2907 2908 default: 2909 assert(0); 2910 } 2911 if (forccs) 2912 gentstreg(cdb,resreg); 2913 } 2914 else if (shiftcnt == REGSIZE * 8) // it's an lword 2915 { 2916 if (oper == OPshl) 2917 swap(&resreg, &sreg); 2918 genmovreg(cdb,sreg,resreg); // MOV sreg,resreg 2919 if (oper == OPashr) 2920 cdb.gen1(0x99); // CWD 2921 else 2922 movregconst(cdb,resreg,0,0); // MOV resreg,0 2923 if (forccs) 2924 { 2925 gentstreg(cdb,sreg); 2926 *pretregs &= mBP | ALLREGS | mES; 2927 } 2928 } 2929 else 2930 { 2931 if (oper == OPshl && sz == 2 * REGSIZE) 2932 swap(&resreg, &sreg); 2933 while (shiftcnt--) 2934 { 2935 cdb.gen2(0xD1 ^ isbyte,modregrm(3,s1,resreg)); 2936 if (sz == 2 * REGSIZE) 2937 { 2938 code_orflag(cdb.last(),CFpsw); 2939 cdb.gen2(0xD1,modregrm(3,s2,sreg)); 2940 } 2941 } 2942 if (forccs) 2943 code_orflag(cdb.last(),CFpsw); 2944 } 2945 if (sz <= REGSIZE) 2946 *pretregs &= mBP | ALLREGS; // flags already set 2947 } 2948 freenode(e2); 2949 break; 2950 } 2951 goto default; 2952 2953 default: 2954 retregs = forregs & ~mCX; // CX will be shift count 2955 if (sz <= REGSIZE) 2956 { 2957 if (forregs & ~regcon.mvar && !(retregs & ~regcon.mvar)) 2958 retregs = ALLREGS & ~mCX; // need something 2959 else if (!retregs) 2960 retregs = ALLREGS & ~mCX; // need something 2961 if (sz == 1) 2962 { retregs &= mAX|mBX|mDX; 2963 if (!retregs) 2964 retregs = mAX|mBX|mDX; 2965 } 2966 } 2967 else 2968 { 2969 if (!(retregs & mMSW)) 2970 retregs = ALLREGS & ~mCX; 2971 } 2972 codelem(cdb,e.EV.E1,&retregs,false); // eval left leaf 2973 2974 if (sz <= REGSIZE) 2975 resreg = findreg(retregs); 2976 else 2977 { 2978 resreg = findregmsw(retregs); 2979 sreg = findreglsw(retregs); 2980 } 2981 L1: 2982 rretregs = mCX; // CX is shift count 2983 if (sz <= REGSIZE) 2984 { 2985 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue 2986 getregs(cdb,retregs); // trash these regs 2987 cdb.gen2(0xD3 ^ isbyte,grex | modregrmx(3,s1,resreg)); // Sxx resreg,CX 2988 2989 if (!I16 && sz == 2 && (oper == OProl || oper == OPror)) 2990 cdb.last().Iflags |= CFopsize; 2991 2992 // Note that a shift by CL does not set the flags if 2993 // CL == 0. If e2 is a constant, we know it isn't 0 2994 // (it would have been optimized out). 2995 if (e2isconst) 2996 *pretregs &= mBP | ALLREGS; // flags already set with result 2997 } 2998 else if (sz == 2 * REGSIZE && 2999 config.target_cpu >= TARGET_80386) 3000 { 3001 reg_t hreg = resreg; 3002 reg_t lreg = sreg; 3003 uint rex = I64 ? (REX_W << 16) : 0; 3004 if (e2isconst) 3005 { 3006 getregs(cdb,retregs); 3007 if (shiftcnt & (REGSIZE * 8)) 3008 { 3009 if (oper == OPshr) 3010 { // SHR hreg,shiftcnt 3011 // MOV lreg,hreg 3012 // XOR hreg,hreg 3013 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt - (REGSIZE * 8)); 3014 genmovreg(cdb,lreg,hreg); 3015 movregconst(cdb,hreg,0,0); 3016 } 3017 else if (oper == OPashr) 3018 { // MOV lreg,hreg 3019 // SAR hreg,31 3020 // SHRD lreg,hreg,shiftcnt 3021 genmovreg(cdb,lreg,hreg); 3022 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),(REGSIZE * 8) - 1); 3023 cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt - (REGSIZE * 8)); 3024 } 3025 else 3026 { // SHL lreg,shiftcnt 3027 // MOV hreg,lreg 3028 // XOR lreg,lreg 3029 cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt - (REGSIZE * 8)); 3030 genmovreg(cdb,hreg,lreg); 3031 movregconst(cdb,lreg,0,0); 3032 } 3033 } 3034 else 3035 { 3036 if (oper == OPshr || oper == OPashr) 3037 { // SHRD lreg,hreg,shiftcnt 3038 // SHR/SAR hreg,shiftcnt 3039 cdb.genc2(0x0FAC,rex | modregrm(3,hreg,lreg),shiftcnt); 3040 cdb.genc2(0xC1,rex | modregrm(3,s1,hreg),shiftcnt); 3041 } 3042 else 3043 { // SHLD hreg,lreg,shiftcnt 3044 // SHL lreg,shiftcnt 3045 cdb.genc2(0x0FA4,rex | modregrm(3,lreg,hreg),shiftcnt); 3046 cdb.genc2(0xC1,rex | modregrm(3,s1,lreg),shiftcnt); 3047 } 3048 } 3049 freenode(e2); 3050 } 3051 else if (config.target_cpu >= TARGET_80486 && REGSIZE == 2) 3052 { 3053 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX 3054 getregs(cdb,retregs); // modify these regs 3055 if (oper == OPshl) 3056 { 3057 /* 3058 SHLD hreg,lreg,CL 3059 SHL lreg,CL 3060 */ 3061 3062 cdb.gen2(0x0FA5,modregrm(3,lreg,hreg)); 3063 cdb.gen2(0xD3,modregrm(3,4,lreg)); 3064 } 3065 else 3066 { 3067 /* 3068 SHRD lreg,hreg,CL 3069 SAR hreg,CL 3070 3071 -- or -- 3072 3073 SHRD lreg,hreg,CL 3074 SHR hreg,CL 3075 */ 3076 cdb.gen2(0x0FAD,modregrm(3,hreg,lreg)); 3077 cdb.gen2(0xD3,modregrm(3,s1,hreg)); 3078 } 3079 } 3080 else 3081 { code* cl1,cl2; 3082 3083 scodelem(cdb,e2,&rretregs,retregs,false); // get rvalue in CX 3084 getregs(cdb,retregs | mCX); // modify these regs 3085 // TEST CL,0x20 3086 cdb.genc2(0xF6,modregrm(3,0,CX),REGSIZE * 8); 3087 cl1 = gennop(null); 3088 CodeBuilder cdb1; 3089 cdb1.ctor(); 3090 cdb1.append(cl1); 3091 if (oper == OPshl) 3092 { 3093 /* TEST CL,20H 3094 JNE L1 3095 SHLD hreg,lreg,CL 3096 SHL lreg,CL 3097 JMP L2 3098 L1: AND CL,20H-1 3099 SHL lreg,CL 3100 MOV hreg,lreg 3101 XOR lreg,lreg 3102 L2: NOP 3103 */ 3104 3105 if (REGSIZE == 2) 3106 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 3107 cdb1.gen2(0xD3,modregrm(3,4,lreg)); 3108 genmovreg(cdb1,hreg,lreg); 3109 genregs(cdb1,0x31,lreg,lreg); 3110 3111 genjmp(cdb,JNE,FLcode,cast(block *)cl1); 3112 cdb.gen2(0x0FA5,modregrm(3,lreg,hreg)); 3113 cdb.gen2(0xD3,modregrm(3,4,lreg)); 3114 } 3115 else 3116 { if (oper == OPashr) 3117 { 3118 /* TEST CL,20H 3119 JNE L1 3120 SHRD lreg,hreg,CL 3121 SAR hreg,CL 3122 JMP L2 3123 L1: AND CL,15 3124 MOV lreg,hreg 3125 SAR hreg,31 3126 SHRD lreg,hreg,CL 3127 L2: NOP 3128 */ 3129 3130 if (REGSIZE == 2) 3131 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 3132 genmovreg(cdb1,lreg,hreg); 3133 cdb1.genc2(0xC1,modregrm(3,s1,hreg),31); 3134 cdb1.gen2(0x0FAD,modregrm(3,hreg,lreg)); 3135 } 3136 else 3137 { 3138 /* TEST CL,20H 3139 JNE L1 3140 SHRD lreg,hreg,CL 3141 SHR hreg,CL 3142 JMP L2 3143 L1: AND CL,15 3144 SHR hreg,CL 3145 MOV lreg,hreg 3146 XOR hreg,hreg 3147 L2: NOP 3148 */ 3149 3150 if (REGSIZE == 2) 3151 cdb1.genc2(0x80,modregrm(3,4,CX),REGSIZE * 8 - 1); 3152 cdb1.gen2(0xD3,modregrm(3,5,hreg)); 3153 genmovreg(cdb1,lreg,hreg); 3154 genregs(cdb1,0x31,hreg,hreg); 3155 } 3156 genjmp(cdb,JNE,FLcode,cast(block *)cl1); 3157 cdb.gen2(0x0FAD,modregrm(3,hreg,lreg)); 3158 cdb.gen2(0xD3,modregrm(3,s1,hreg)); 3159 } 3160 cl2 = gennop(null); 3161 genjmp(cdb,JMPS,FLcode,cast(block *)cl2); 3162 cdb.append(cdb1); 3163 cdb.append(cl2); 3164 } 3165 break; 3166 } 3167 else if (sz == 2 * REGSIZE) 3168 { 3169 scodelem(cdb,e2,&rretregs,retregs,false); 3170 getregs(cdb,retregs | mCX); 3171 if (oper == OPshl) 3172 swap(&resreg, &sreg); 3173 if (!e2isconst) // if not sure shift count != 0 3174 cdb.genc2(0xE3,0,6); // JCXZ .+6 3175 cdb.gen2(0xD1,modregrm(3,s1,resreg)); 3176 code_orflag(cdb.last(),CFtarg2); 3177 cdb.gen2(0xD1,modregrm(3,s2,sreg)); 3178 cdb.genc2(0xE2,0,cast(targ_uns)-6); // LOOP .-6 3179 regimmed_set(CX,0); // note that now CX == 0 3180 } 3181 else 3182 assert(0); 3183 break; 3184 } 3185 fixresult(cdb,e,retregs,pretregs); 3186 } 3187 3188 3189 /*************************** 3190 * Perform a 'star' reference (indirection). 3191 */ 3192 3193 void cdind(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3194 { 3195 regm_t retregs; 3196 reg_t reg; 3197 uint nreg; 3198 3199 //printf("cdind(e = %p, *pretregs = %s)\n",e,regm_str(*pretregs)); 3200 tym_t tym = tybasic(e.Ety); 3201 if (tyfloating(tym)) 3202 { 3203 if (config.inline8087) 3204 { 3205 if (*pretregs & mST0) 3206 { 3207 cdind87(cdb, e, pretregs); 3208 return; 3209 } 3210 if (I64 && tym == TYcfloat && *pretregs & (ALLREGS | mBP)) 3211 { } 3212 else if (tycomplex(tym)) 3213 { 3214 cload87(cdb, e, pretregs); 3215 return; 3216 } 3217 3218 if (*pretregs & mPSW) 3219 { 3220 cdind87(cdb, e, pretregs); 3221 return; 3222 } 3223 } 3224 } 3225 3226 elem *e1 = e.EV.E1; 3227 assert(e1); 3228 switch (tym) 3229 { 3230 case TYstruct: 3231 case TYarray: 3232 // This case should never happen, why is it here? 3233 tym = TYnptr; // don't confuse allocreg() 3234 if (*pretregs & (mES | mCX) || e.Ety & mTYfar) 3235 tym = TYfptr; 3236 break; 3237 3238 default: 3239 break; 3240 } 3241 uint sz = _tysize[tym]; 3242 uint isbyte = tybyte(tym) != 0; 3243 3244 code cs; 3245 3246 getlvalue(cdb,&cs,e,RMload); // get addressing mode 3247 //printf("Irex = %02x, Irm = x%02x, Isib = x%02x\n", cs.Irex, cs.Irm, cs.Isib); 3248 //fprintf(stderr,"cd2 :\n"); WRcodlst(c); 3249 if (*pretregs == 0) 3250 { 3251 if (e.Ety & mTYvolatile) // do the load anyway 3252 *pretregs = regmask(e.Ety, 0); // load into registers 3253 else 3254 return; 3255 } 3256 3257 regm_t idxregs = idxregm(&cs); // mask of index regs used 3258 3259 if (*pretregs == mPSW) 3260 { 3261 if (!I16 && tym == TYfloat) 3262 { 3263 retregs = ALLREGS & ~idxregs; 3264 allocreg(cdb,&retregs,®,TYfloat); 3265 cs.Iop = 0x8B; 3266 code_newreg(&cs,reg); 3267 cdb.gen(&cs); // MOV reg,lsw 3268 cdb.gen2(0xD1,modregrmx(3,4,reg)); // SHL reg,1 3269 code_orflag(cdb.last(), CFpsw); 3270 } 3271 else if (sz <= REGSIZE) 3272 { 3273 cs.Iop = 0x81 ^ isbyte; 3274 cs.Irm |= modregrm(0,7,0); 3275 cs.IFL2 = FLconst; 3276 cs.IEV2.Vsize_t = 0; 3277 cdb.gen(&cs); // CMP [idx],0 3278 } 3279 else if (!I16 && sz == REGSIZE + 2) // if far pointer 3280 { 3281 retregs = ALLREGS & ~idxregs; 3282 allocreg(cdb,&retregs,®,TYint); 3283 cs.Iop = MOVZXw; 3284 cs.Irm |= modregrm(0,reg,0); 3285 getlvalue_msw(&cs); 3286 cdb.gen(&cs); // MOVZX reg,msw 3287 goto L4; 3288 } 3289 else if (sz <= 2 * REGSIZE) 3290 { 3291 retregs = ALLREGS & ~idxregs; 3292 allocreg(cdb,&retregs,®,TYint); 3293 cs.Iop = 0x8B; 3294 code_newreg(&cs,reg); 3295 getlvalue_msw(&cs); 3296 cdb.gen(&cs); // MOV reg,msw 3297 if (I32) 3298 { if (tym == TYdouble || tym == TYdouble_alias) 3299 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 3300 } 3301 else if (tym == TYfloat) 3302 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 3303 L4: 3304 cs.Iop = 0x0B; 3305 getlvalue_lsw(&cs); 3306 cs.Iflags |= CFpsw; 3307 cdb.gen(&cs); // OR reg,lsw 3308 } 3309 else if (!I32 && sz == 8) 3310 { 3311 *pretregs |= DOUBLEREGS_16; // fake it for now 3312 goto L1; 3313 } 3314 else 3315 { 3316 debug WRTYxx(tym); 3317 assert(0); 3318 } 3319 } 3320 else // else return result in reg 3321 { 3322 L1: 3323 retregs = *pretregs; 3324 if (sz == 8 && 3325 (retregs & (mPSW | mSTACK | ALLREGS | mBP)) == mSTACK) 3326 { int i; 3327 3328 // Optimizer should not CSE these, as the result is worse code! 3329 assert(!e.Ecount); 3330 3331 cs.Iop = 0xFF; 3332 cs.Irm |= modregrm(0,6,0); 3333 cs.IEV1.Voffset += 8 - REGSIZE; 3334 stackchanged = 1; 3335 i = 8 - REGSIZE; 3336 do 3337 { 3338 cdb.gen(&cs); // PUSH EA+i 3339 cdb.genadjesp(REGSIZE); 3340 cs.IEV1.Voffset -= REGSIZE; 3341 stackpush += REGSIZE; 3342 i -= REGSIZE; 3343 } 3344 while (i >= 0); 3345 goto L3; 3346 } 3347 if (I16 && sz == 8) 3348 retregs = DOUBLEREGS_16; 3349 3350 // Watch out for loading an lptr from an lptr! We must have 3351 // the offset loaded into a different register. 3352 /*if (retregs & mES && (cs.Iflags & CFSEG) == CFes) 3353 retregs = ALLREGS;*/ 3354 3355 { 3356 assert(!isbyte || retregs & BYTEREGS); 3357 allocreg(cdb,&retregs,®,tym); // alloc registers 3358 } 3359 if (retregs & XMMREGS) 3360 { 3361 assert(sz == 4 || sz == 8 || sz == 16 || sz == 32); // float, double or vector 3362 cs.Iop = xmmload(tym); 3363 cs.Irex &= ~REX_W; 3364 code_newreg(&cs,reg - XMM0); 3365 checkSetVex(&cs,tym); 3366 cdb.gen(&cs); // MOV reg,[idx] 3367 } 3368 else if (sz <= REGSIZE) 3369 { 3370 cs.Iop = 0x8B; // MOV 3371 if (sz <= 2 && !I16 && 3372 config.target_cpu >= TARGET_PentiumPro && config.flags4 & CFG4speed) 3373 { 3374 cs.Iop = tyuns(tym) ? MOVZXw : MOVSXw; // MOVZX/MOVSX 3375 cs.Iflags &= ~CFopsize; 3376 } 3377 cs.Iop ^= isbyte; 3378 L2: 3379 code_newreg(&cs,reg); 3380 cdb.gen(&cs); // MOV reg,[idx] 3381 if (isbyte && reg >= 4) 3382 code_orrex(cdb.last(), REX); 3383 } 3384 else if ((tym == TYfptr || tym == TYhptr) && retregs & mES) 3385 { 3386 cs.Iop = 0xC4; // LES reg,[idx] 3387 goto L2; 3388 } 3389 else if (sz <= 2 * REGSIZE) 3390 { uint lsreg; 3391 3392 cs.Iop = 0x8B; 3393 // Be careful not to interfere with index registers 3394 if (!I16) 3395 { 3396 // Can't handle if both result registers are used in 3397 // the addressing mode. 3398 if ((retregs & idxregs) == retregs) 3399 { 3400 retregs = mMSW & allregs & ~idxregs; 3401 if (!retregs) 3402 retregs |= mCX; 3403 retregs |= mLSW & ~idxregs; 3404 3405 // We can run out of registers, so if that's possible, 3406 // give us *one* of the idxregs 3407 if ((retregs & ~regcon.mvar & mLSW) == 0) 3408 { 3409 regm_t x = idxregs & mLSW; 3410 if (x) 3411 retregs |= mask(findreg(x)); // give us one idxreg 3412 } 3413 else if ((retregs & ~regcon.mvar & mMSW) == 0) 3414 { 3415 regm_t x = idxregs & mMSW; 3416 if (x) 3417 retregs |= mask(findreg(x)); // give us one idxreg 3418 } 3419 3420 allocreg(cdb,&retregs,®,tym); // alloc registers 3421 assert((retregs & idxregs) != retregs); 3422 } 3423 3424 lsreg = findreglsw(retregs); 3425 if (mask(reg) & idxregs) // reg is in addr mode 3426 { 3427 code_newreg(&cs,lsreg); 3428 cdb.gen(&cs); // MOV lsreg,lsw 3429 if (sz == REGSIZE + 2) 3430 cs.Iflags |= CFopsize; 3431 lsreg = reg; 3432 getlvalue_msw(&cs); // MOV reg,msw 3433 } 3434 else 3435 { 3436 code_newreg(&cs,reg); 3437 getlvalue_msw(&cs); 3438 cdb.gen(&cs); // MOV reg,msw 3439 if (sz == REGSIZE + 2) 3440 cdb.last().Iflags |= CFopsize; 3441 getlvalue_lsw(&cs); // MOV lsreg,lsw 3442 } 3443 NEWREG(cs.Irm,lsreg); 3444 cdb.gen(&cs); 3445 } 3446 else 3447 { 3448 // Index registers are always the lsw! 3449 cs.Irm |= modregrm(0,reg,0); 3450 getlvalue_msw(&cs); 3451 cdb.gen(&cs); // MOV reg,msw 3452 lsreg = findreglsw(retregs); 3453 NEWREG(cs.Irm,lsreg); 3454 getlvalue_lsw(&cs); // MOV lsreg,lsw 3455 cdb.gen(&cs); 3456 } 3457 } 3458 else if (I16 && sz == 8) 3459 { 3460 assert(reg == AX); 3461 cs.Iop = 0x8B; 3462 cs.IEV1.Voffset += 6; 3463 cdb.gen(&cs); // MOV AX,EA+6 3464 cs.Irm |= modregrm(0,CX,0); 3465 cs.IEV1.Voffset -= 4; 3466 cdb.gen(&cs); // MOV CX,EA+2 3467 NEWREG(cs.Irm,DX); 3468 cs.IEV1.Voffset -= 2; 3469 cdb.gen(&cs); // MOV DX,EA 3470 cs.IEV1.Voffset += 4; 3471 NEWREG(cs.Irm,BX); 3472 cdb.gen(&cs); // MOV BX,EA+4 3473 } 3474 else 3475 assert(0); 3476 L3: 3477 fixresult(cdb,e,retregs,pretregs); 3478 } 3479 //fprintf(stderr,"cdafter :\n"); WRcodlst(c); 3480 } 3481 3482 3483 3484 /******************************** 3485 * Generate code to load ES with the right segment value, 3486 * do nothing if e is a far pointer. 3487 */ 3488 3489 private code *cod2_setES(tym_t ty) 3490 { 3491 if (config.exe & EX_flat) 3492 return null; 3493 3494 int push; 3495 3496 CodeBuilder cdb; 3497 cdb.ctor(); 3498 switch (tybasic(ty)) 3499 { 3500 case TYnptr: 3501 if (!(config.flags3 & CFG3eseqds)) 3502 { push = 0x1E; // PUSH DS 3503 goto L1; 3504 } 3505 break; 3506 case TYcptr: 3507 push = 0x0E; // PUSH CS 3508 goto L1; 3509 case TYsptr: 3510 if ((config.wflags & WFssneds) || !(config.flags3 & CFG3eseqds)) 3511 { push = 0x16; // PUSH SS 3512 L1: 3513 // Must load ES 3514 getregs(cdb,mES); 3515 cdb.gen1(push); 3516 cdb.gen1(0x07); // POP ES 3517 } 3518 break; 3519 3520 default: 3521 break; 3522 } 3523 return cdb.finish(); 3524 } 3525 3526 /******************************** 3527 * Generate code for intrinsic strlen(). 3528 */ 3529 3530 void cdstrlen(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3531 { 3532 /* Generate strlen in CX: 3533 LES DI,e1 3534 CLR AX ;scan for 0 3535 MOV CX,-1 ;largest possible string 3536 REPNE SCASB 3537 NOT CX 3538 DEC CX 3539 */ 3540 3541 regm_t retregs = mDI; 3542 tym_t ty1 = e.EV.E1.Ety; 3543 if (!tyreg(ty1)) 3544 retregs |= mES; 3545 codelem(cdb,e.EV.E1,&retregs,false); 3546 3547 // Make sure ES contains proper segment value 3548 cdb.append(cod2_setES(ty1)); 3549 3550 ubyte rex = I64 ? REX_W : 0; 3551 3552 getregs_imm(cdb,mAX | mCX); 3553 movregconst(cdb,AX,0,1); // MOV AL,0 3554 movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0); // MOV CX,-1 3555 getregs(cdb,mDI|mCX); 3556 cdb.gen1(0xF2); // REPNE 3557 cdb.gen1(0xAE); // SCASB 3558 genregs(cdb,0xF7,2,CX); // NOT CX 3559 code_orrex(cdb.last(), rex); 3560 if (I64) 3561 cdb.gen2(0xFF,(rex << 16) | modregrm(3,1,CX)); // DEC reg 3562 else 3563 cdb.gen1(0x48 + CX); // DEC CX 3564 3565 if (*pretregs & mPSW) 3566 { 3567 cdb.last().Iflags |= CFpsw; 3568 *pretregs &= ~mPSW; 3569 } 3570 fixresult(cdb,e,mCX,pretregs); 3571 } 3572 3573 3574 /********************************* 3575 * Generate code for strcmp(s1,s2) intrinsic. 3576 */ 3577 3578 void cdstrcmp(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3579 { 3580 char need_DS; 3581 int segreg; 3582 3583 /* 3584 MOV SI,s1 ;get destination pointer (s1) 3585 MOV CX,s1+2 3586 LES DI,s2 ;get source pointer (s2) 3587 PUSH DS 3588 MOV DS,CX 3589 CLR AX ;scan for 0 3590 MOV CX,-1 ;largest possible string 3591 REPNE SCASB 3592 NOT CX ;CX = string length of s2 3593 SUB DI,CX ;point DI back to beginning 3594 REPE CMPSB ;compare string 3595 POP DS 3596 JE L1 ;strings are equal 3597 SBB AX,AX 3598 SBB AX,-1 3599 L1: 3600 */ 3601 3602 regm_t retregs1 = mSI; 3603 tym_t ty1 = e.EV.E1.Ety; 3604 if (!tyreg(ty1)) 3605 retregs1 |= mCX; 3606 codelem(cdb,e.EV.E1,&retregs1,false); 3607 3608 regm_t retregs = mDI; 3609 tym_t ty2 = e.EV.E2.Ety; 3610 if (!tyreg(ty2)) 3611 retregs |= mES; 3612 scodelem(cdb,e.EV.E2,&retregs,retregs1,false); 3613 3614 // Make sure ES contains proper segment value 3615 cdb.append(cod2_setES(ty2)); 3616 getregs_imm(cdb,mAX | mCX); 3617 3618 ubyte rex = I64 ? REX_W : 0; 3619 3620 // Load DS with right value 3621 switch (tybasic(ty1)) 3622 { 3623 case TYnptr: 3624 case TYimmutPtr: 3625 need_DS = false; 3626 break; 3627 3628 case TYsptr: 3629 if (config.wflags & WFssneds) // if sptr can't use DS segment 3630 segreg = SEG_SS; 3631 else 3632 segreg = SEG_DS; 3633 goto L1; 3634 case TYcptr: 3635 segreg = SEG_CS; 3636 L1: 3637 cdb.gen1(0x1E); // PUSH DS 3638 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3639 cdb.gen1(0x1F); // POP DS 3640 need_DS = true; 3641 break; 3642 case TYfptr: 3643 case TYvptr: 3644 case TYhptr: 3645 cdb.gen1(0x1E); // PUSH DS 3646 cdb.gen2(0x8E,modregrm(3,SEG_DS,CX)); // MOV DS,CX 3647 need_DS = true; 3648 break; 3649 default: 3650 assert(0); 3651 } 3652 3653 movregconst(cdb,AX,0,0); // MOV AX,0 3654 movregconst(cdb,CX,-cast(targ_size_t)1,I64 ? 64 : 0); // MOV CX,-1 3655 getregs(cdb,mSI|mDI|mCX); 3656 cdb.gen1(0xF2); // REPNE 3657 cdb.gen1(0xAE); // SCASB 3658 genregs(cdb,0xF7,2,CX); // NOT CX 3659 code_orrex(cdb.last(),rex); 3660 genregs(cdb,0x2B,DI,CX); // SUB DI,CX 3661 code_orrex(cdb.last(),rex); 3662 cdb.gen1(0xF3); // REPE 3663 cdb.gen1(0xA6); // CMPSB 3664 if (need_DS) 3665 cdb.gen1(0x1F); // POP DS 3666 code *c4 = gennop(null); 3667 if (*pretregs != mPSW) // if not flags only 3668 { 3669 genjmp(cdb,JE,FLcode,cast(block *) c4); // JE L1 3670 getregs(cdb,mAX); 3671 genregs(cdb,0x1B,AX,AX); // SBB AX,AX 3672 code_orrex(cdb.last(),rex); 3673 cdb.genc2(0x81,(rex << 16) | modregrm(3,3,AX),cast(targ_uns)-1); // SBB AX,-1 3674 } 3675 3676 *pretregs &= ~mPSW; 3677 cdb.append(c4); 3678 fixresult(cdb,e,mAX,pretregs); 3679 } 3680 3681 /********************************* 3682 * Generate code for memcmp(s1,s2,n) intrinsic. 3683 */ 3684 3685 void cdmemcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3686 { 3687 char need_DS; 3688 int segreg; 3689 3690 /* 3691 MOV SI,s1 ;get destination pointer (s1) 3692 MOV DX,s1+2 3693 LES DI,s2 ;get source pointer (s2) 3694 MOV CX,n ;get number of bytes to compare 3695 PUSH DS 3696 MOV DS,DX 3697 XOR AX,AX 3698 REPE CMPSB ;compare string 3699 POP DS 3700 JE L1 ;strings are equal 3701 SBB AX,AX 3702 SBB AX,-1 3703 L1: 3704 */ 3705 3706 elem *e1 = e.EV.E1; 3707 assert(e1.Eoper == OPparam); 3708 3709 // Get s1 into DX:SI 3710 regm_t retregs1 = mSI; 3711 tym_t ty1 = e1.EV.E1.Ety; 3712 if (!tyreg(ty1)) 3713 retregs1 |= mDX; 3714 codelem(cdb,e1.EV.E1,&retregs1,false); 3715 3716 // Get s2 into ES:DI 3717 regm_t retregs = mDI; 3718 tym_t ty2 = e1.EV.E2.Ety; 3719 if (!tyreg(ty2)) 3720 retregs |= mES; 3721 scodelem(cdb,e1.EV.E2,&retregs,retregs1,false); 3722 freenode(e1); 3723 3724 // Get nbytes into CX 3725 regm_t retregs3 = mCX; 3726 scodelem(cdb,e.EV.E2,&retregs3,retregs | retregs1,false); 3727 3728 // Make sure ES contains proper segment value 3729 cdb.append(cod2_setES(ty2)); 3730 3731 // Load DS with right value 3732 switch (tybasic(ty1)) 3733 { 3734 case TYnptr: 3735 case TYimmutPtr: 3736 need_DS = false; 3737 break; 3738 3739 case TYsptr: 3740 if (config.wflags & WFssneds) // if sptr can't use DS segment 3741 segreg = SEG_SS; 3742 else 3743 segreg = SEG_DS; 3744 goto L1; 3745 case TYcptr: 3746 segreg = SEG_CS; 3747 L1: 3748 cdb.gen1(0x1E); // PUSH DS 3749 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3750 cdb.gen1(0x1F); // POP DS 3751 need_DS = true; 3752 break; 3753 case TYfptr: 3754 case TYvptr: 3755 case TYhptr: 3756 cdb.gen1(0x1E); // PUSH DS 3757 cdb.gen2(0x8E,modregrm(3,SEG_DS,DX)); // MOV DS,DX 3758 need_DS = true; 3759 break; 3760 default: 3761 assert(0); 3762 } 3763 3764 static if (1) 3765 { 3766 getregs(cdb,mAX); 3767 cdb.gen2(0x33,modregrm(3,AX,AX)); // XOR AX,AX 3768 code_orflag(cdb.last(), CFpsw); // keep flags 3769 } 3770 else 3771 { 3772 if (*pretregs != mPSW) // if not flags only 3773 regwithvalue(cdb,mAX,0,null,0); // put 0 in AX 3774 } 3775 3776 getregs(cdb,mCX | mSI | mDI); 3777 cdb.gen1(0xF3); // REPE 3778 cdb.gen1(0xA6); // CMPSB 3779 if (need_DS) 3780 cdb.gen1(0x1F); // POP DS 3781 if (*pretregs != mPSW) // if not flags only 3782 { 3783 code *c4 = gennop(null); 3784 genjmp(cdb,JE,FLcode,cast(block *) c4); // JE L1 3785 getregs(cdb,mAX); 3786 genregs(cdb,0x1B,AX,AX); // SBB AX,AX 3787 cdb.genc2(0x81,modregrm(3,3,AX),cast(targ_uns)-1); // SBB AX,-1 3788 cdb.append(c4); 3789 } 3790 3791 *pretregs &= ~mPSW; 3792 fixresult(cdb,e,mAX,pretregs); 3793 } 3794 3795 /********************************* 3796 * Generate code for strcpy(s1,s2) intrinsic. 3797 */ 3798 3799 void cdstrcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3800 { 3801 char need_DS; 3802 int segreg; 3803 3804 /* 3805 LES DI,s2 ;ES:DI = s2 3806 CLR AX ;scan for 0 3807 MOV CX,-1 ;largest possible string 3808 REPNE SCASB ;find end of s2 3809 NOT CX ;CX = strlen(s2) + 1 (for EOS) 3810 SUB DI,CX 3811 MOV SI,DI 3812 PUSH DS 3813 PUSH ES 3814 LES DI,s1 3815 POP DS 3816 MOV AX,DI ;return value is s1 3817 REP MOVSB 3818 POP DS 3819 */ 3820 3821 stackchanged = 1; 3822 regm_t retregs = mDI; 3823 tym_t ty2 = tybasic(e.EV.E2.Ety); 3824 if (!tyreg(ty2)) 3825 retregs |= mES; 3826 ubyte rex = I64 ? REX_W : 0; 3827 codelem(cdb,e.EV.E2,&retregs,false); 3828 3829 // Make sure ES contains proper segment value 3830 cdb.append(cod2_setES(ty2)); 3831 getregs_imm(cdb,mAX | mCX); 3832 movregconst(cdb,AX,0,1); // MOV AL,0 3833 movregconst(cdb,CX,-1,I64?64:0); // MOV CX,-1 3834 getregs(cdb,mAX|mCX|mSI|mDI); 3835 cdb.gen1(0xF2); // REPNE 3836 cdb.gen1(0xAE); // SCASB 3837 genregs(cdb,0xF7,2,CX); // NOT CX 3838 code_orrex(cdb.last(),rex); 3839 genregs(cdb,0x2B,DI,CX); // SUB DI,CX 3840 code_orrex(cdb.last(),rex); 3841 genmovreg(cdb,SI,DI); // MOV SI,DI 3842 3843 // Load DS with right value 3844 switch (ty2) 3845 { 3846 case TYnptr: 3847 case TYimmutPtr: 3848 need_DS = false; 3849 break; 3850 3851 case TYsptr: 3852 if (config.wflags & WFssneds) // if sptr can't use DS segment 3853 segreg = SEG_SS; 3854 else 3855 segreg = SEG_DS; 3856 goto L1; 3857 case TYcptr: 3858 segreg = SEG_CS; 3859 L1: 3860 cdb.gen1(0x1E); // PUSH DS 3861 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3862 cdb.genadjesp(REGSIZE * 2); 3863 need_DS = true; 3864 break; 3865 case TYfptr: 3866 case TYvptr: 3867 case TYhptr: 3868 segreg = SEG_ES; 3869 goto L1; 3870 3871 default: 3872 assert(0); 3873 } 3874 3875 retregs = mDI; 3876 tym_t ty1 = tybasic(e.EV.E1.Ety); 3877 if (!tyreg(ty1)) 3878 retregs |= mES; 3879 scodelem(cdb,e.EV.E1,&retregs,mCX|mSI,false); 3880 getregs(cdb,mAX|mCX|mSI|mDI); 3881 3882 // Make sure ES contains proper segment value 3883 if (ty2 != TYnptr || ty1 != ty2) 3884 cdb.append(cod2_setES(ty1)); 3885 else 3886 {} // ES is already same as DS 3887 3888 if (need_DS) 3889 cdb.gen1(0x1F); // POP DS 3890 if (*pretregs) 3891 genmovreg(cdb,AX,DI); // MOV AX,DI 3892 cdb.gen1(0xF3); // REP 3893 cdb.gen1(0xA4); // MOVSB 3894 3895 if (need_DS) 3896 { cdb.gen1(0x1F); // POP DS 3897 cdb.genadjesp(-(REGSIZE * 2)); 3898 } 3899 fixresult(cdb,e,mAX | mES,pretregs); 3900 } 3901 3902 /********************************* 3903 * Generate code for memcpy(s1,s2,n) intrinsic. 3904 * OPmemcpy 3905 * / \ 3906 * s1 OPparam 3907 * / \ 3908 * s2 n 3909 */ 3910 3911 void cdmemcpy(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3912 { 3913 char need_DS; 3914 int segreg; 3915 3916 /* 3917 MOV SI,s2 3918 MOV DX,s2+2 3919 MOV CX,n 3920 LES DI,s1 3921 PUSH DS 3922 MOV DS,DX 3923 MOV AX,DI ;return value is s1 3924 REP MOVSB 3925 POP DS 3926 */ 3927 3928 elem *e2 = e.EV.E2; 3929 assert(e2.Eoper == OPparam); 3930 3931 // Get s2 into DX:SI 3932 regm_t retregs2 = mSI; 3933 tym_t ty2 = e2.EV.E1.Ety; 3934 if (!tyreg(ty2)) 3935 retregs2 |= mDX; 3936 codelem(cdb,e2.EV.E1,&retregs2,false); 3937 3938 // Need to check if nbytes is 0 (OPconst of 0 would have been removed by elmemcpy()) 3939 const zeroCheck = e2.EV.E2.Eoper != OPconst; 3940 3941 // Get nbytes into CX 3942 regm_t retregs3 = mCX; 3943 scodelem(cdb,e2.EV.E2,&retregs3,retregs2,false); 3944 freenode(e2); 3945 3946 // Get s1 into ES:DI 3947 regm_t retregs1 = mDI; 3948 tym_t ty1 = e.EV.E1.Ety; 3949 if (!tyreg(ty1)) 3950 retregs1 |= mES; 3951 scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false); 3952 3953 ubyte rex = I64 ? REX_W : 0; 3954 3955 // Make sure ES contains proper segment value 3956 cdb.append(cod2_setES(ty1)); 3957 3958 // Load DS with right value 3959 switch (tybasic(ty2)) 3960 { 3961 case TYnptr: 3962 case TYimmutPtr: 3963 need_DS = false; 3964 break; 3965 3966 case TYsptr: 3967 if (config.wflags & WFssneds) // if sptr can't use DS segment 3968 segreg = SEG_SS; 3969 else 3970 segreg = SEG_DS; 3971 goto L1; 3972 3973 case TYcptr: 3974 segreg = SEG_CS; 3975 L1: 3976 cdb.gen1(0x1E); // PUSH DS 3977 cdb.gen1(0x06 + (segreg << 3)); // PUSH segreg 3978 cdb.gen1(0x1F); // POP DS 3979 need_DS = true; 3980 break; 3981 3982 case TYfptr: 3983 case TYvptr: 3984 case TYhptr: 3985 cdb.gen1(0x1E); // PUSH DS 3986 cdb.gen2(0x8E,modregrm(3,SEG_DS,DX)); // MOV DS,DX 3987 need_DS = true; 3988 break; 3989 3990 default: 3991 assert(0); 3992 } 3993 3994 if (*pretregs) // if need return value 3995 { getregs(cdb,mAX); 3996 genmovreg(cdb,AX,DI); 3997 } 3998 3999 if (0 && I32 && config.flags4 & CFG4speed) 4000 { 4001 /* This is only faster if the memory is dword aligned, if not 4002 * it is significantly slower than just a rep movsb. 4003 */ 4004 /* mov EDX,ECX 4005 * shr ECX,2 4006 * jz L1 4007 * repe movsd 4008 * L1: nop 4009 * and EDX,3 4010 * jz L2 4011 * mov ECX,EDX 4012 * repe movsb 4013 * L2: nop 4014 */ 4015 getregs(cdb,mSI | mDI | mCX | mDX); 4016 genmovreg(cdb,DX,CX); // MOV EDX,ECX 4017 cdb.genc2(0xC1,modregrm(3,5,CX),2); // SHR ECX,2 4018 code *cx = gennop(null); 4019 genjmp(cdb, JE, FLcode, cast(block *)cx); // JZ L1 4020 cdb.gen1(0xF3); // REPE 4021 cdb.gen1(0xA5); // MOVSW 4022 cdb.append(cx); 4023 cdb.genc2(0x81, modregrm(3,4,DX),3); // AND EDX,3 4024 4025 code *cnop = gennop(null); 4026 genjmp(cdb, JE, FLcode, cast(block *)cnop); // JZ L2 4027 genmovreg(cdb,CX,DX); // MOV ECX,EDX 4028 cdb.gen1(0xF3); // REPE 4029 cdb.gen1(0xA4); // MOVSB 4030 cdb.append(cnop); 4031 } 4032 else 4033 { 4034 getregs(cdb,mSI | mDI | mCX); 4035 code* cnop; 4036 if (zeroCheck) 4037 { 4038 cnop = gennop(null); 4039 gentstreg(cdb,CX); // TEST ECX,ECX 4040 if (I64) 4041 code_orrex(cdb.last, REX_W); 4042 genjmp(cdb, JE, FLcode, cast(block *)cnop); // JZ cnop 4043 } 4044 4045 if (I16 && config.flags4 & CFG4speed) // if speed optimization 4046 { 4047 // Note this doesn't work if CX is 0 4048 cdb.gen2(0xD1,(rex << 16) | modregrm(3,5,CX)); // SHR CX,1 4049 cdb.gen1(0xF3); // REPE 4050 cdb.gen1(0xA5); // MOVSW 4051 cdb.gen2(0x11,(rex << 16) | modregrm(3,CX,CX)); // ADC CX,CX 4052 } 4053 cdb.gen1(0xF3); // REPE 4054 cdb.gen1(0xA4); // MOVSB 4055 if (zeroCheck) 4056 cdb.append(cnop); 4057 if (need_DS) 4058 cdb.gen1(0x1F); // POP DS 4059 } 4060 fixresult(cdb,e,mES|mAX,pretregs); 4061 } 4062 4063 4064 /********************************* 4065 * Generate code for memset(s,value,numbytes) intrinsic. 4066 * (s OPmemset (numbytes OPparam value)) 4067 */ 4068 4069 void cdmemset(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4070 { 4071 regm_t retregs1; 4072 regm_t retregs3; 4073 reg_t reg; 4074 reg_t vreg; 4075 tym_t ty1; 4076 int segreg; 4077 targ_uns numbytes; 4078 uint m; 4079 4080 //printf("cdmemset(*pretregs = %s)\n", regm_str(*pretregs)); 4081 elem *e2 = e.EV.E2; 4082 assert(e2.Eoper == OPparam); 4083 4084 elem* evalue = e2.EV.E2; 4085 elem* enumbytes = e2.EV.E1; 4086 4087 const grex = I64 ? (REX_W << 16) : 0; 4088 4089 bool valueIsConst = false; 4090 targ_size_t value; 4091 if (evalue.Eoper == OPconst) 4092 { 4093 value = el_tolong(evalue) & 0xFF; 4094 value |= value << 8; 4095 if (I32 || I64) 4096 { 4097 value |= value << 16; 4098 static if (value.sizeof == 8) 4099 if (I64) 4100 value |= value << 32; 4101 } 4102 valueIsConst = true; 4103 } 4104 else if (evalue.Eoper == OPstrpar) // happens if evalue is a struct of 0 size 4105 { 4106 value = 0; 4107 valueIsConst = true; 4108 } 4109 else 4110 value = 0xDEADBEEF; // stop annoying false positives that value is not inited 4111 4112 if (enumbytes.Eoper == OPconst) 4113 { 4114 static uint REP_THRESHOLD() { return REGSIZE * (6 + (REGSIZE == 4)); } 4115 numbytes = cast(uint)cast(targ_size_t)el_tolong(enumbytes); 4116 if (numbytes <= REP_THRESHOLD && 4117 !I16 && // doesn't work for 16 bits 4118 valueIsConst) 4119 { 4120 targ_uns offset = 0; 4121 retregs1 = *pretregs; 4122 if (!retregs1) 4123 retregs1 = ALLREGS; 4124 codelem(cdb,e.EV.E1,&retregs1,false); 4125 reg = findreg(retregs1); 4126 if (evalue.Eoper == OPconst) 4127 { 4128 const uint mrm = buildModregrm(0,0,reg); 4129 switch (numbytes) 4130 { 4131 case 4: // MOV [reg],imm32 4132 cdb.genc2(0xC7,mrm,value); 4133 goto fixres; 4134 case 2: // MOV [reg],imm16 4135 cdb.genc2(0xC7,mrm,value); 4136 cdb.last().Iflags = CFopsize; 4137 goto fixres; 4138 case 1: // MOV [reg],imm8 4139 cdb.genc2(0xC6,mrm,value); 4140 goto fixres; 4141 4142 default: 4143 break; 4144 } 4145 } 4146 4147 regwithvalue(cdb, BYTEREGS & ~retregs1, value, &vreg, I64 ? 64 : 0); 4148 freenode(evalue); 4149 freenode(e2); 4150 4151 m = grex | buildModregrm(2,vreg,reg); 4152 while (numbytes >= REGSIZE) 4153 { // MOV dword ptr offset[reg],vreg 4154 cdb.gen2(0x89,m); 4155 cdb.last().IEV1.Voffset = offset; 4156 cdb.last().IFL1 = FLconst; 4157 numbytes -= REGSIZE; 4158 offset += REGSIZE; 4159 } 4160 m &= ~grex; 4161 if (numbytes & 4) 4162 { // MOV dword ptr offset[reg],vreg 4163 cdb.gen2(0x89,m); 4164 cdb.last().IEV1.Voffset = offset; 4165 cdb.last().IFL1 = FLconst; 4166 offset += 4; 4167 } 4168 if (numbytes & 2) 4169 { // MOV word ptr offset[reg],vreg 4170 cdb.gen2(0x89,m); 4171 cdb.last().IEV1.Voffset = offset; 4172 cdb.last().IFL1 = FLconst; 4173 cdb.last().Iflags = CFopsize; 4174 offset += 2; 4175 } 4176 if (numbytes & 1) 4177 { // MOV byte ptr offset[reg],vreg 4178 cdb.gen2(0x88,m); 4179 cdb.last().IEV1.Voffset = offset; 4180 cdb.last().IFL1 = FLconst; 4181 if (I64 && vreg >= 4) 4182 cdb.last().Irex |= REX; 4183 } 4184 fixres: 4185 fixresult(cdb,e,retregs1,pretregs); 4186 return; 4187 } 4188 } 4189 4190 // Get nbytes into CX 4191 regm_t retregs2 = 0; 4192 if (enumbytes.Eoper != OPconst) 4193 { 4194 retregs2 = mCX; 4195 codelem(cdb,enumbytes,&retregs2,false); 4196 } 4197 4198 // Get value into AX 4199 retregs3 = mAX; 4200 if (valueIsConst) 4201 { 4202 regwithvalue(cdb, mAX, value, null, I64?64:0); 4203 freenode(evalue); 4204 } 4205 else 4206 { 4207 scodelem(cdb,evalue,&retregs3,retregs2,false); 4208 4209 getregs(cdb,mAX); 4210 if (I16) 4211 { 4212 cdb.gen2(0x8A,modregrm(3,AH,AL)); // MOV AH,AL 4213 } 4214 else if (I32) 4215 { 4216 genregs(cdb,MOVZXb,AX,AX); // MOVZX EAX,AL 4217 cdb.genc2(0x69,modregrm(3,AX,AX),0x01010101); // IMUL EAX,EAX,0x01010101 4218 } 4219 else 4220 { 4221 genregs(cdb,MOVZXb,AX,AX); // MOVZX EAX,AL 4222 regm_t regm = allregs & ~(mAX | retregs2); 4223 reg_t r; 4224 regwithvalue(cdb,regm,cast(targ_size_t)0x01010101_01010101,&r,64); // MOV reg,0x01010101_01010101 4225 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r)); // IMUL RAX,reg 4226 } 4227 } 4228 freenode(e2); 4229 4230 // Get s into ES:DI 4231 retregs1 = mDI; 4232 ty1 = e.EV.E1.Ety; 4233 if (!tyreg(ty1)) 4234 retregs1 |= mES; 4235 scodelem(cdb,e.EV.E1,&retregs1,retregs2 | retregs3,false); 4236 reg = DI; //findreg(retregs1); 4237 4238 // Make sure ES contains proper segment value 4239 cdb.append(cod2_setES(ty1)); 4240 4241 if (*pretregs) // if need return value 4242 { 4243 getregs(cdb,mBX); 4244 genmovreg(cdb,BX,DI); // MOV EBX,EDI 4245 } 4246 4247 4248 if (enumbytes.Eoper == OPconst) 4249 { 4250 getregs(cdb,mDI); 4251 if (const numwords = numbytes / REGSIZE) 4252 { 4253 regwithvalue(cdb,mCX,numwords,null, I64 ? 64 : 0); 4254 getregs(cdb,mCX); 4255 cdb.gen1(0xF3); // REP 4256 cdb.gen1(STOS); // STOSW/D/Q 4257 if (I64) 4258 code_orrex(cdb.last(), REX_W); 4259 regimmed_set(CX, 0); // CX is now 0 4260 } 4261 4262 auto remainder = numbytes & (REGSIZE - 1); 4263 if (I64 && remainder >= 4) 4264 { 4265 cdb.gen1(STOS); // STOSD 4266 remainder -= 4; 4267 } 4268 for (; remainder; --remainder) 4269 cdb.gen1(STOSB); // STOSB 4270 fixresult(cdb,e,mES|mBX,pretregs); 4271 return; 4272 } 4273 4274 getregs(cdb,mDI | mCX); 4275 if (I16) 4276 { 4277 if (config.flags4 & CFG4speed) // if speed optimization 4278 { 4279 cdb.gen2(0xD1,modregrm(3,5,CX)); // SHR CX,1 4280 cdb.gen1(0xF3); // REP 4281 cdb.gen1(STOS); // STOSW 4282 cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC CX,CX 4283 } 4284 cdb.gen1(0xF3); // REP 4285 cdb.gen1(STOSB); // STOSB 4286 regimmed_set(CX, 0); // CX is now 0 4287 fixresult(cdb,e,mES|mBX,pretregs); 4288 return; 4289 } 4290 4291 /* MOV sreg,ECX 4292 SHR ECX,n 4293 REP 4294 STOSD/Q 4295 4296 ADC ECX,ECX 4297 REP 4298 STOSD 4299 4300 MOV ECX,sreg 4301 AND ECX,3 4302 REP 4303 STOSB 4304 */ 4305 regm_t regs = allregs & (*pretregs ? ~(mAX|mBX|mCX|mDI) : ~(mAX|mCX|mDI)); 4306 reg_t sreg; 4307 allocreg(cdb,®s,&sreg,TYint); 4308 genregs(cdb,0x89,CX,sreg); // MOV sreg,ECX (32 bits only) 4309 4310 const n = I64 ? 3 : 2; 4311 cdb.genc2(0xC1, grex | modregrm(3,5,CX), n); // SHR ECX,n 4312 4313 cdb.gen1(0xF3); // REP 4314 cdb.gen1(STOS); // STOSD/Q 4315 if (I64) 4316 code_orrex(cdb.last(), REX_W); 4317 4318 if (I64) 4319 { 4320 cdb.gen2(0x11,modregrm(3,CX,CX)); // ADC ECX,ECX 4321 cdb.gen1(0xF3); // REP 4322 cdb.gen1(STOS); // STOSD 4323 } 4324 4325 genregs(cdb,0x89,sreg,CX); // MOV ECX,sreg (32 bits only) 4326 cdb.genc2(0x81, modregrm(3,4,CX), 3); // AND ECX,3 4327 cdb.gen1(0xF3); // REP 4328 cdb.gen1(STOSB); // STOSB 4329 4330 regimmed_set(CX, 0); // CX is now 0 4331 fixresult(cdb,e,mES|mBX,pretregs); 4332 } 4333 4334 4335 /********************** 4336 * Do structure assignments. 4337 * This should be fixed so that (s1 = s2) is rewritten to (&s1 = &s2). 4338 * Mebbe call cdstreq() for double assignments??? 4339 */ 4340 4341 void cdstreq(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4342 { 4343 char need_DS = false; 4344 elem *e1 = e.EV.E1; 4345 elem *e2 = e.EV.E2; 4346 int segreg; 4347 uint numbytes = cast(uint)type_size(e.ET); // # of bytes in structure/union 4348 ubyte rex = I64 ? REX_W : 0; 4349 4350 //printf("cdstreq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4351 4352 // First, load pointer to rvalue into SI 4353 regm_t srcregs = mSI; // source is DS:SI 4354 docommas(cdb,&e2); 4355 if (e2.Eoper == OPind) // if (.. = *p) 4356 { elem *e21 = e2.EV.E1; 4357 4358 segreg = SEG_DS; 4359 switch (tybasic(e21.Ety)) 4360 { 4361 case TYsptr: 4362 if (config.wflags & WFssneds) // if sptr can't use DS segment 4363 segreg = SEG_SS; 4364 break; 4365 case TYcptr: 4366 if (!(config.exe & EX_flat)) 4367 segreg = SEG_CS; 4368 break; 4369 case TYfptr: 4370 case TYvptr: 4371 case TYhptr: 4372 srcregs |= mCX; // get segment also 4373 need_DS = true; 4374 break; 4375 4376 default: 4377 break; 4378 } 4379 codelem(cdb,e21,&srcregs,false); 4380 freenode(e2); 4381 if (segreg != SEG_DS) // if not DS 4382 { 4383 getregs(cdb,mCX); 4384 cdb.gen2(0x8C,modregrm(3,segreg,CX)); // MOV CX,segreg 4385 need_DS = true; 4386 } 4387 } 4388 else if (e2.Eoper == OPvar) 4389 { 4390 if (e2.EV.Vsym.ty() & mTYfar) // if e2 is in a far segment 4391 { srcregs |= mCX; // get segment also 4392 need_DS = true; 4393 cdrelconst(cdb,e2,&srcregs); 4394 } 4395 else 4396 { 4397 segreg = segfl[el_fl(e2)]; 4398 if ((config.wflags & WFssneds) && segreg == SEG_SS || // if source is on stack 4399 segreg == SEG_CS) // if source is in CS 4400 { 4401 need_DS = true; // we need to reload DS 4402 // Load CX with segment 4403 srcregs |= mCX; 4404 getregs(cdb,mCX); 4405 cdb.gen2(0x8C, // MOV CX,[SS|CS] 4406 modregrm(3,segreg,CX)); 4407 } 4408 cdrelconst(cdb,e2,&srcregs); 4409 } 4410 freenode(e2); 4411 } 4412 else 4413 { 4414 if (!(config.exe & EX_flat)) 4415 { need_DS = true; 4416 srcregs |= mCX; 4417 } 4418 codelem(cdb,e2,&srcregs,false); 4419 } 4420 4421 // now get pointer to lvalue (destination) in ES:DI 4422 regm_t dstregs = (config.exe & EX_flat) ? mDI : mES|mDI; 4423 if (e1.Eoper == OPind) // if (*p = ..) 4424 { 4425 if (tyreg(e1.EV.E1.Ety)) 4426 dstregs = mDI; 4427 cdb.append(cod2_setES(e1.EV.E1.Ety)); 4428 scodelem(cdb,e1.EV.E1,&dstregs,srcregs,false); 4429 } 4430 else 4431 cdrelconst(cdb,e1,&dstregs); 4432 freenode(e1); 4433 4434 getregs(cdb,(srcregs | dstregs) & (mLSW | mDI)); 4435 if (need_DS) 4436 { assert(!(config.exe & EX_flat)); 4437 cdb.gen1(0x1E); // PUSH DS 4438 cdb.gen2(0x8E,modregrm(3,SEG_DS,CX)); // MOV DS,CX 4439 } 4440 if (numbytes <= REGSIZE * (6 + (REGSIZE == 4))) 4441 { 4442 while (numbytes >= REGSIZE) 4443 { 4444 cdb.gen1(0xA5); // MOVSW 4445 code_orrex(cdb.last(), rex); 4446 numbytes -= REGSIZE; 4447 } 4448 //if (numbytes) 4449 // printf("cdstreq numbytes %d\n",numbytes); 4450 if (I64 && numbytes >= 4) 4451 { 4452 cdb.gen1(0xA5); // MOVSD 4453 numbytes -= 4; 4454 } 4455 while (numbytes--) 4456 cdb.gen1(0xA4); // MOVSB 4457 } 4458 else 4459 { 4460 static if (1) 4461 { 4462 uint remainder = numbytes & (REGSIZE - 1); 4463 numbytes /= REGSIZE; // number of words 4464 getregs_imm(cdb,mCX); 4465 movregconst(cdb,CX,numbytes,0); // # of bytes/words 4466 cdb.gen1(0xF3); // REP 4467 if (REGSIZE == 8) 4468 cdb.gen1(REX | REX_W); 4469 cdb.gen1(0xA5); // REP MOVSD 4470 regimmed_set(CX,0); // note that CX == 0 4471 if (I64 && remainder >= 4) 4472 { 4473 cdb.gen1(0xA5); // MOVSD 4474 remainder -= 4; 4475 } 4476 for (; remainder; remainder--) 4477 { 4478 cdb.gen1(0xA4); // MOVSB 4479 } 4480 } 4481 else 4482 { 4483 uint movs; 4484 if (numbytes & (REGSIZE - 1)) // if odd 4485 movs = 0xA4; // MOVSB 4486 else 4487 { 4488 movs = 0xA5; // MOVSW 4489 numbytes /= REGSIZE; // # of words 4490 } 4491 getregs_imm(cdb,mCX); 4492 movregconst(cdb,CX,numbytes,0); // # of bytes/words 4493 cdb.gen1(0xF3); // REP 4494 cdb.gen1(movs); 4495 regimmed_set(CX,0); // note that CX == 0 4496 } 4497 } 4498 if (need_DS) 4499 cdb.gen1(0x1F); // POP DS 4500 assert(!(*pretregs & mPSW)); 4501 if (*pretregs) 4502 { // ES:DI points past what we want 4503 4504 cdb.genc2(0x81,(rex << 16) | modregrm(3,5,DI), type_size(e.ET)); // SUB DI,numbytes 4505 regm_t retregs = mDI; 4506 if (*pretregs & mMSW && !(config.exe & EX_flat)) 4507 retregs |= mES; 4508 fixresult(cdb,e,retregs,pretregs); 4509 } 4510 } 4511 4512 4513 /********************** 4514 * Get the address of. 4515 * Is also called by cdstreq() to set up pointer to a structure. 4516 */ 4517 4518 void cdrelconst(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4519 { 4520 //printf("cdrelconst(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4521 4522 /* The following should not happen, but cgelem.c is a little stupid. 4523 * Assertion can be tripped by func("string" == 0); and similar 4524 * things. Need to add goals to optelem() to fix this completely. 4525 */ 4526 //assert((*pretregs & mPSW) == 0); 4527 if (*pretregs & mPSW) 4528 { 4529 *pretregs &= ~mPSW; 4530 gentstreg(cdb,SP); // SP is never 0 4531 if (I64) 4532 code_orrex(cdb.last(), REX_W); 4533 } 4534 if (!*pretregs) 4535 return; 4536 4537 assert(e); 4538 tym_t tym = tybasic(e.Ety); 4539 switch (tym) 4540 { 4541 case TYstruct: 4542 case TYarray: 4543 case TYldouble: 4544 case TYildouble: 4545 case TYcldouble: 4546 tym = TYnptr; // don't confuse allocreg() 4547 if (*pretregs & (mES | mCX) || e.Ety & mTYfar) 4548 { 4549 tym = TYfptr; 4550 } 4551 break; 4552 4553 case TYifunc: 4554 tym = TYfptr; 4555 break; 4556 4557 default: 4558 if (tyfunc(tym)) 4559 tym = 4560 tyfarfunc(tym) ? TYfptr : 4561 TYnptr; 4562 break; 4563 } 4564 //assert(tym & typtr); // don't fail on (int)&a 4565 4566 SC sclass; 4567 reg_t mreg, // segment of the address (TYfptrs only) 4568 lreg; // offset of the address 4569 4570 allocreg(cdb,pretregs,&lreg,tym); 4571 if (_tysize[tym] > REGSIZE) // fptr could've been cast to long 4572 { 4573 if (*pretregs & mES) 4574 { 4575 /* Do not allocate CX or SI here, as cdstreq() needs 4576 * them preserved. cdstreq() should use scodelem() 4577 */ 4578 mreg = allocScratchReg(cdb, (mAX|mBX|mDX|mDI) & ~mask(lreg)); 4579 } 4580 else 4581 { 4582 mreg = lreg; 4583 lreg = findreglsw(*pretregs); 4584 } 4585 4586 /* if (get segment of function that isn't necessarily in the 4587 * current segment (i.e. CS doesn't have the right value in it) 4588 */ 4589 Symbol *s = e.EV.Vsym; 4590 if (s.Sfl == FLdatseg) 4591 { assert(0); 4592 } 4593 sclass = cast(SC) s.Sclass; 4594 const ety = tybasic(s.ty()); 4595 if ((tyfarfunc(ety) || ety == TYifunc) && 4596 (sclass == SCextern || ClassInline(sclass) || config.wflags & WFthunk) 4597 || s.Sfl == FLfardata 4598 || (s.ty() & mTYcs && s.Sseg != cseg && (LARGECODE || s.Sclass == SCcomdat)) 4599 ) 4600 { // MOV mreg,seg of symbol 4601 cdb.gencs(0xB8 + mreg,0,FLextern,s); 4602 cdb.last().Iflags = CFseg; 4603 } 4604 else 4605 { 4606 const fl = (s.ty() & mTYcs) ? FLcsdata : s.Sfl; 4607 cdb.gen2(0x8C, // MOV mreg,SEG REGISTER 4608 modregrm(3,segfl[fl],mreg)); 4609 } 4610 if (*pretregs & mES) 4611 cdb.gen2(0x8E,modregrm(3,0,mreg)); // MOV ES,mreg 4612 } 4613 getoffset(cdb,e,lreg); 4614 } 4615 4616 /********************************* 4617 * Load the offset portion of the address represented by e into 4618 * reg. 4619 */ 4620 4621 void getoffset(ref CodeBuilder cdb,elem *e,reg_t reg) 4622 { 4623 //printf("getoffset(e = %p, reg = %d)\n", e, reg); 4624 code cs = void; 4625 cs.Iflags = 0; 4626 ubyte rex = 0; 4627 cs.Irex = rex; 4628 assert(e.Eoper == OPvar || e.Eoper == OPrelconst); 4629 auto fl = el_fl(e); 4630 switch (fl) 4631 { 4632 case FLdatseg: 4633 cs.IEV2.Vpointer = e.EV.Vpointer; 4634 goto L3; 4635 4636 case FLfardata: 4637 goto L4; 4638 4639 case FLtlsdata: 4640 if (config.exe & EX_posix) 4641 { 4642 Lposix: 4643 if (config.flags3 & CFG3pic) 4644 { 4645 if (I64) 4646 { 4647 /* Generate: 4648 * LEA DI,s@TLSGD[RIP] 4649 */ 4650 //assert(reg == DI); 4651 code css = void; 4652 css.Irex = REX | REX_W; 4653 css.Iop = LEA; 4654 css.Irm = modregrm(0,reg,5); 4655 if (reg & 8) 4656 css.Irex |= REX_R; 4657 css.Iflags = CFopsize; 4658 css.IFL1 = cast(ubyte)fl; 4659 css.IEV1.Vsym = e.EV.Vsym; 4660 css.IEV1.Voffset = e.EV.Voffset; 4661 cdb.gen(&css); 4662 } 4663 else 4664 { 4665 /* Generate: 4666 * LEA EAX,s@TLSGD[1*EBX+0] 4667 */ 4668 assert(reg == AX); 4669 load_localgot(cdb); 4670 code css = void; 4671 css.Iflags = 0; 4672 css.Iop = LEA; // LEA 4673 css.Irex = 0; 4674 css.Irm = modregrm(0,AX,4); 4675 css.Isib = modregrm(0,BX,5); 4676 css.IFL1 = cast(ubyte)fl; 4677 css.IEV1.Vsym = e.EV.Vsym; 4678 css.IEV1.Voffset = e.EV.Voffset; 4679 cdb.gen(&css); 4680 } 4681 return; 4682 } 4683 /* Generate: 4684 * MOV reg,GS:[00000000] 4685 * ADD reg, offset s@TLS_LE 4686 * for locals, and for globals: 4687 * MOV reg,GS:[00000000] 4688 * ADD reg, s@TLS_IE 4689 * note different fixup 4690 */ 4691 int stack = 0; 4692 if (reg == STACK) 4693 { regm_t retregs = ALLREGS; 4694 4695 reg_t regx; 4696 allocreg(cdb,&retregs,®x,TYoffset); 4697 reg = findreg(retregs); 4698 stack = 1; 4699 } 4700 4701 code css = void; 4702 css.Irex = rex; 4703 css.Iop = 0x8B; 4704 css.Irm = modregrm(0, 0, BPRM); 4705 code_newreg(&css, reg); 4706 css.Iflags = CFgs; 4707 css.IFL1 = FLconst; 4708 css.IEV1.Vuns = 0; 4709 cdb.gen(&css); // MOV reg,GS:[00000000] 4710 4711 if (e.EV.Vsym.Sclass == SCstatic || e.EV.Vsym.Sclass == SClocstat) 4712 { // ADD reg, offset s 4713 cs.Irex = rex; 4714 cs.Iop = 0x81; 4715 cs.Irm = modregrm(3,0,reg & 7); 4716 if (reg & 8) 4717 cs.Irex |= REX_B; 4718 cs.Iflags = CFoff; 4719 cs.IFL2 = cast(ubyte)fl; 4720 cs.IEV2.Vsym = e.EV.Vsym; 4721 cs.IEV2.Voffset = e.EV.Voffset; 4722 } 4723 else 4724 { // ADD reg, s 4725 cs.Irex = rex; 4726 cs.Iop = 0x03; 4727 cs.Irm = modregrm(0,0,BPRM); 4728 code_newreg(&cs, reg); 4729 cs.Iflags = CFoff; 4730 cs.IFL1 = cast(ubyte)fl; 4731 cs.IEV1.Vsym = e.EV.Vsym; 4732 cs.IEV1.Voffset = e.EV.Voffset; 4733 } 4734 cdb.gen(&cs); // ADD reg, xxxx 4735 4736 if (stack) 4737 { 4738 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4739 if (reg & 8) 4740 code_orrex(cdb.last(), REX_B); 4741 cdb.genadjesp(REGSIZE); 4742 stackchanged = 1; 4743 } 4744 break; 4745 } 4746 else if (config.exe & EX_windos) 4747 { 4748 if (I64) 4749 { 4750 Lwin64: 4751 assert(reg != STACK); 4752 cs.IEV2.Vsym = e.EV.Vsym; 4753 cs.IEV2.Voffset = e.EV.Voffset; 4754 cs.Iop = 0xB8 + (reg & 7); // MOV Ereg,offset s 4755 if (reg & 8) 4756 cs.Irex |= REX_B; 4757 cs.Iflags = CFoff; // want offset only 4758 cs.IFL2 = cast(ubyte)fl; 4759 cdb.gen(&cs); 4760 break; 4761 } 4762 goto L4; 4763 } 4764 else 4765 { 4766 goto L4; 4767 } 4768 4769 case FLfunc: 4770 fl = FLextern; /* don't want PC relative addresses */ 4771 goto L4; 4772 4773 case FLextern: 4774 if (config.exe & EX_posix && e.EV.Vsym.ty() & mTYthread) 4775 goto Lposix; 4776 if (config.exe & EX_WIN64 && e.EV.Vsym.ty() & mTYthread) 4777 goto Lwin64; 4778 goto L4; 4779 4780 case FLdata: 4781 case FLudata: 4782 case FLgot: 4783 case FLgotoff: 4784 case FLcsdata: 4785 L4: 4786 cs.IEV2.Vsym = e.EV.Vsym; 4787 cs.IEV2.Voffset = e.EV.Voffset; 4788 L3: 4789 if (reg == STACK) 4790 { stackchanged = 1; 4791 cs.Iop = 0x68; /* PUSH immed16 */ 4792 cdb.genadjesp(REGSIZE); 4793 } 4794 else 4795 { cs.Iop = 0xB8 + (reg & 7); // MOV reg,immed16 4796 if (reg & 8) 4797 cs.Irex |= REX_B; 4798 if (I64) 4799 { cs.Irex |= REX_W; 4800 if (config.flags3 & CFG3pic || config.exe == EX_WIN64) 4801 { // LEA reg,immed32[RIP] 4802 cs.Iop = LEA; 4803 cs.Irm = modregrm(0,reg & 7,5); 4804 if (reg & 8) 4805 cs.Irex = (cs.Irex & ~REX_B) | REX_R; 4806 cs.IFL1 = cast(ubyte)fl; 4807 cs.IEV1.Vsym = cs.IEV2.Vsym; 4808 cs.IEV1.Voffset = cs.IEV2.Voffset; 4809 } 4810 } 4811 } 4812 cs.Iflags = CFoff; /* want offset only */ 4813 cs.IFL2 = cast(ubyte)fl; 4814 cdb.gen(&cs); 4815 break; 4816 4817 case FLreg: 4818 /* Allow this since the tree optimizer puts & in front of */ 4819 /* register doubles. */ 4820 goto L2; 4821 case FLauto: 4822 case FLfast: 4823 case FLbprel: 4824 case FLfltreg: 4825 reflocal = true; 4826 goto L2; 4827 case FLpara: 4828 refparam = true; 4829 L2: 4830 if (reg == STACK) 4831 { regm_t retregs = ALLREGS; 4832 4833 reg_t regx; 4834 allocreg(cdb,&retregs,®x,TYoffset); 4835 reg = findreg(retregs); 4836 loadea(cdb,e,&cs,LEA,reg,0,0,0); // LEA reg,EA 4837 if (I64) 4838 code_orrex(cdb.last(), REX_W); 4839 cdb.gen1(0x50 + (reg & 7)); // PUSH reg 4840 if (reg & 8) 4841 code_orrex(cdb.last(), REX_B); 4842 cdb.genadjesp(REGSIZE); 4843 stackchanged = 1; 4844 } 4845 else 4846 { 4847 loadea(cdb,e,&cs,LEA,reg,0,0,0); // LEA reg,EA 4848 if (I64) 4849 code_orrex(cdb.last(), REX_W); 4850 } 4851 break; 4852 4853 default: 4854 debug 4855 { 4856 elem_print(e); 4857 WRFL(fl); 4858 } 4859 assert(0); 4860 } 4861 } 4862 4863 4864 /****************** 4865 * OPneg, OPsqrt, OPsin, OPcos, OPrint 4866 */ 4867 4868 void cdneg(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4869 { 4870 //printf("cdneg()\n"); 4871 //elem_print(e); 4872 if (*pretregs == 0) 4873 { 4874 codelem(cdb,e.EV.E1,pretregs,false); 4875 return; 4876 } 4877 const tyml = tybasic(e.EV.E1.Ety); 4878 const sz = _tysize[tyml]; 4879 if (tyfloating(tyml)) 4880 { 4881 if (tycomplex(tyml)) 4882 { 4883 neg_complex87(cdb, e, pretregs); 4884 return; 4885 } 4886 if (tyxmmreg(tyml) && e.Eoper == OPneg && *pretregs & XMMREGS) 4887 { 4888 xmmneg(cdb,e,pretregs); 4889 return; 4890 } 4891 if (config.inline8087 && 4892 ((*pretregs & (ALLREGS | mBP)) == 0 || e.Eoper == OPsqrt || I64)) 4893 { 4894 neg87(cdb,e,pretregs); 4895 return; 4896 } 4897 regm_t retregs = (I16 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 4898 codelem(cdb,e.EV.E1,&retregs,false); 4899 getregs(cdb,retregs); 4900 if (I32) 4901 { 4902 const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 4903 cdb.genc2(0x81,modregrm(3,6,reg),0x80000000); // XOR EDX,sign bit 4904 } 4905 else 4906 { 4907 const reg = (sz == 8) ? AX : findregmsw(retregs); 4908 cdb.genc2(0x81,modregrm(3,6,reg),0x8000); // XOR AX,0x8000 4909 } 4910 fixresult(cdb,e,retregs,pretregs); 4911 return; 4912 } 4913 4914 const uint isbyte = sz == 1; 4915 const possregs = (isbyte) ? BYTEREGS : allregs; 4916 regm_t retregs = *pretregs & possregs; 4917 if (retregs == 0) 4918 retregs = possregs; 4919 codelem(cdb,e.EV.E1,&retregs,false); 4920 getregs(cdb,retregs); // retregs will be destroyed 4921 if (sz <= REGSIZE) 4922 { 4923 const reg = findreg(retregs); 4924 uint rex = (I64 && sz == 8) ? REX_W : 0; 4925 if (I64 && sz == 1 && reg >= 4) 4926 rex |= REX; 4927 cdb.gen2(0xF7 ^ isbyte,(rex << 16) | modregrmx(3,3,reg)); // NEG reg 4928 if (!I16 && _tysize[tyml] == SHORTSIZE && *pretregs & mPSW) 4929 cdb.last().Iflags |= CFopsize | CFpsw; 4930 *pretregs &= mBP | ALLREGS; // flags already set 4931 } 4932 else if (sz == 2 * REGSIZE) 4933 { 4934 const msreg = findregmsw(retregs); 4935 cdb.gen2(0xF7,modregrm(3,3,msreg)); // NEG msreg 4936 const lsreg = findreglsw(retregs); 4937 cdb.gen2(0xF7,modregrm(3,3,lsreg)); // NEG lsreg 4938 code_orflag(cdb.last(), CFpsw); // need flag result of previous NEG 4939 cdb.genc2(0x81,modregrm(3,3,msreg),0); // SBB msreg,0 4940 } 4941 else 4942 assert(0); 4943 fixresult(cdb,e,retregs,pretregs); 4944 } 4945 4946 4947 /****************** 4948 * Absolute value operator 4949 */ 4950 4951 4952 void cdabs(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 4953 { 4954 //printf("cdabs(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4955 if (*pretregs == 0) 4956 { 4957 codelem(cdb,e.EV.E1,pretregs,false); 4958 return; 4959 } 4960 const tyml = tybasic(e.EV.E1.Ety); 4961 const sz = _tysize[tyml]; 4962 const rex = (I64 && sz == 8) ? REX_W : 0; 4963 if (tyfloating(tyml)) 4964 { 4965 if (tyxmmreg(tyml) && *pretregs & XMMREGS) 4966 { 4967 xmmabs(cdb,e,pretregs); 4968 return; 4969 } 4970 if (config.inline8087 && ((*pretregs & (ALLREGS | mBP)) == 0 || I64)) 4971 { 4972 neg87(cdb,e,pretregs); 4973 return; 4974 } 4975 regm_t retregs = (!I32 && sz == 8) ? DOUBLEREGS_16 : ALLREGS; 4976 codelem(cdb,e.EV.E1,&retregs,false); 4977 getregs(cdb,retregs); 4978 if (I32) 4979 { 4980 const reg = (sz == 8) ? findregmsw(retregs) : findreg(retregs); 4981 cdb.genc2(0x81,modregrm(3,4,reg),0x7FFFFFFF); // AND EDX,~sign bit 4982 } 4983 else 4984 { 4985 const reg = (sz == 8) ? AX : findregmsw(retregs); 4986 cdb.genc2(0x81,modregrm(3,4,reg),0x7FFF); // AND AX,0x7FFF 4987 } 4988 fixresult(cdb,e,retregs,pretregs); 4989 return; 4990 } 4991 4992 const uint isbyte = sz == 1; 4993 assert(isbyte == 0); 4994 regm_t possregs = (sz <= REGSIZE) ? cast(regm_t) mAX : allregs; 4995 if (!I16 && sz == REGSIZE) 4996 possregs = allregs; 4997 regm_t retregs = *pretregs & possregs; 4998 if (retregs == 0) 4999 retregs = possregs; 5000 codelem(cdb,e.EV.E1,&retregs,false); 5001 getregs(cdb,retregs); // retregs will be destroyed 5002 if (sz <= REGSIZE) 5003 { 5004 /* CWD 5005 XOR AX,DX 5006 SUB AX,DX 5007 or: 5008 MOV r,reg 5009 SAR r,63 5010 XOR reg,r 5011 SUB reg,r 5012 */ 5013 reg_t reg; 5014 reg_t r; 5015 5016 if (!I16 && sz == REGSIZE) 5017 { 5018 reg = findreg(retregs); 5019 r = allocScratchReg(cdb, allregs & ~retregs); 5020 getregs(cdb,retregs); 5021 genmovreg(cdb,r,reg); // MOV r,reg 5022 cdb.genc2(0xC1,modregrmx(3,7,r),REGSIZE * 8 - 1); // SAR r,31/63 5023 code_orrex(cdb.last(), rex); 5024 } 5025 else 5026 { 5027 reg = AX; 5028 r = DX; 5029 getregs(cdb,mDX); 5030 if (!I16 && sz == SHORTSIZE) 5031 cdb.gen1(0x98); // CWDE 5032 cdb.gen1(0x99); // CWD 5033 code_orrex(cdb.last(), rex); 5034 } 5035 cdb.gen2(0x33 ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // XOR reg,r 5036 cdb.gen2(0x2B ^ isbyte,(rex << 16) | modregxrmx(3,reg,r)); // SUB reg,r 5037 if (!I16 && sz == SHORTSIZE && *pretregs & mPSW) 5038 cdb.last().Iflags |= CFopsize | CFpsw; 5039 if (*pretregs & mPSW) 5040 cdb.last().Iflags |= CFpsw; 5041 *pretregs &= ~mPSW; // flags already set 5042 } 5043 else if (sz == 2 * REGSIZE) 5044 { 5045 /* or DX,DX 5046 jns L2 5047 neg DX 5048 neg AX 5049 sbb DX,0 5050 L2: 5051 */ 5052 5053 code *cnop = gennop(null); 5054 const msreg = findregmsw(retregs); 5055 const lsreg = findreglsw(retregs); 5056 genregs(cdb,0x09,msreg,msreg); // OR msreg,msreg 5057 genjmp(cdb,JNS,FLcode,cast(block *)cnop); 5058 cdb.gen2(0xF7,modregrm(3,3,msreg)); // NEG msreg 5059 cdb.gen2(0xF7,modregrm(3,3,lsreg)); // NEG lsreg+1 5060 cdb.genc2(0x81,modregrm(3,3,msreg),0); // SBB msreg,0 5061 cdb.append(cnop); 5062 } 5063 else 5064 assert(0); 5065 fixresult(cdb,e,retregs,pretregs); 5066 } 5067 5068 /************************** 5069 * Post increment and post decrement. 5070 */ 5071 5072 void cdpost(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5073 { 5074 //printf("cdpost(pretregs = %s)\n", regm_str(*pretregs)); 5075 code cs = void; 5076 const op = e.Eoper; // OPxxxx 5077 if (*pretregs == 0) // if nothing to return 5078 { 5079 cdaddass(cdb,e,pretregs); 5080 return; 5081 } 5082 const tym_t tyml = tybasic(e.EV.E1.Ety); 5083 const sz = _tysize[tyml]; 5084 elem *e2 = e.EV.E2; 5085 const rex = (I64 && sz == 8) ? REX_W : 0; 5086 5087 if (tyfloating(tyml)) 5088 { 5089 if (config.fpxmmregs && tyxmmreg(tyml) && 5090 !tycomplex(tyml) // SIMD code is not set up to deal with complex 5091 ) 5092 { 5093 xmmpost(cdb,e,pretregs); 5094 return; 5095 } 5096 5097 if (config.inline8087) 5098 { 5099 post87(cdb,e,pretregs); 5100 return; 5101 } 5102 if (config.exe & EX_windos) 5103 { 5104 assert(sz <= 8); 5105 getlvalue(cdb,&cs,e.EV.E1,DOUBLEREGS); 5106 freenode(e.EV.E1); 5107 regm_t idxregs = idxregm(&cs); // mask of index regs used 5108 cs.Iop = 0x8B; /* MOV DOUBLEREGS,EA */ 5109 fltregs(cdb,&cs,tyml); 5110 stackchanged = 1; 5111 int stackpushsave = stackpush; 5112 regm_t retregs; 5113 if (sz == 8) 5114 { 5115 if (I32) 5116 { 5117 cdb.gen1(0x50 + DX); // PUSH DOUBLEREGS 5118 cdb.gen1(0x50 + AX); 5119 stackpush += DOUBLESIZE; 5120 retregs = DOUBLEREGS2_32; 5121 } 5122 else 5123 { 5124 cdb.gen1(0x50 + AX); 5125 cdb.gen1(0x50 + BX); 5126 cdb.gen1(0x50 + CX); 5127 cdb.gen1(0x50 + DX); /* PUSH DOUBLEREGS */ 5128 stackpush += DOUBLESIZE + DOUBLESIZE; 5129 5130 cdb.gen1(0x50 + AX); 5131 cdb.gen1(0x50 + BX); 5132 cdb.gen1(0x50 + CX); 5133 cdb.gen1(0x50 + DX); /* PUSH DOUBLEREGS */ 5134 retregs = DOUBLEREGS_16; 5135 } 5136 } 5137 else 5138 { 5139 stackpush += FLOATSIZE; /* so we know something is on */ 5140 if (!I32) 5141 cdb.gen1(0x50 + DX); 5142 cdb.gen1(0x50 + AX); 5143 retregs = FLOATREGS2; 5144 } 5145 cdb.genadjesp(stackpush - stackpushsave); 5146 5147 cgstate.stackclean++; 5148 scodelem(cdb,e2,&retregs,idxregs,false); 5149 cgstate.stackclean--; 5150 5151 if (tyml == TYdouble || tyml == TYdouble_alias) 5152 { 5153 retregs = DOUBLEREGS; 5154 callclib(cdb,e,(op == OPpostinc) ? CLIB.dadd : CLIB.dsub, 5155 &retregs,idxregs); 5156 } 5157 else /* tyml == TYfloat */ 5158 { 5159 retregs = FLOATREGS; 5160 callclib(cdb,e,(op == OPpostinc) ? CLIB.fadd : CLIB.fsub, 5161 &retregs,idxregs); 5162 } 5163 cs.Iop = 0x89; /* MOV EA,DOUBLEREGS */ 5164 fltregs(cdb,&cs,tyml); 5165 stackpushsave = stackpush; 5166 if (tyml == TYdouble || tyml == TYdouble_alias) 5167 { if (*pretregs == mSTACK) 5168 retregs = mSTACK; /* leave result on stack */ 5169 else 5170 { 5171 if (I32) 5172 { 5173 cdb.gen1(0x58 + AX); 5174 cdb.gen1(0x58 + DX); 5175 } 5176 else 5177 { 5178 cdb.gen1(0x58 + DX); 5179 cdb.gen1(0x58 + CX); 5180 cdb.gen1(0x58 + BX); 5181 cdb.gen1(0x58 + AX); 5182 } 5183 stackpush -= DOUBLESIZE; 5184 retregs = DOUBLEREGS; 5185 } 5186 } 5187 else 5188 { 5189 cdb.gen1(0x58 + AX); 5190 if (!I32) 5191 cdb.gen1(0x58 + DX); 5192 stackpush -= FLOATSIZE; 5193 retregs = FLOATREGS; 5194 } 5195 cdb.genadjesp(stackpush - stackpushsave); 5196 fixresult(cdb,e,retregs,pretregs); 5197 return; 5198 } 5199 } 5200 if (tyxmmreg(tyml)) 5201 { 5202 xmmpost(cdb,e,pretregs); 5203 return; 5204 } 5205 5206 assert(e2.Eoper == OPconst); 5207 uint isbyte = (sz == 1); 5208 regm_t possregs = isbyte ? BYTEREGS : allregs; 5209 getlvalue(cdb,&cs,e.EV.E1,0); 5210 freenode(e.EV.E1); 5211 regm_t idxregs = idxregm(&cs); // mask of index regs used 5212 if (sz <= REGSIZE && *pretregs == mPSW && (cs.Irm & 0xC0) == 0xC0 && 5213 (!I16 || (idxregs & (mBX | mSI | mDI | mBP)))) 5214 { 5215 // Generate: 5216 // TEST reg,reg 5217 // LEA reg,n[reg] // don't affect flags 5218 reg_t reg = cs.Irm & 7; 5219 if (cs.Irex & REX_B) 5220 reg |= 8; 5221 cs.Iop = 0x85 ^ isbyte; 5222 code_newreg(&cs, reg); 5223 cs.Iflags |= CFpsw; 5224 cdb.gen(&cs); // TEST reg,reg 5225 5226 // If lvalue is a register variable, we must mark it as modified 5227 modEA(cdb,&cs); 5228 5229 auto n = e2.EV.Vint; 5230 if (op == OPpostdec) 5231 n = -n; 5232 int rm = reg; 5233 if (I16) 5234 { 5235 static immutable byte[8] regtorm = [ -1,-1,-1, 7,-1, 6, 4, 5 ]; // copied from cod1.c 5236 rm = regtorm[reg]; 5237 } 5238 cdb.genc1(LEA,(rex << 16) | buildModregrm(2,reg,rm),FLconst,n); // LEA reg,n[reg] 5239 return; 5240 } 5241 else if (sz <= REGSIZE || tyfv(tyml)) 5242 { 5243 code cs2 = void; 5244 5245 cs.Iop = 0x8B ^ isbyte; 5246 regm_t retregs = possregs & ~idxregs & *pretregs; 5247 if (!tyfv(tyml)) 5248 { 5249 if (retregs == 0) 5250 retregs = possregs & ~idxregs; 5251 } 5252 else /* tyfv(tyml) */ 5253 { 5254 if ((retregs &= mLSW) == 0) 5255 retregs = mLSW & ~idxregs; 5256 /* Can't use LES if the EA uses ES as a seg override */ 5257 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 5258 { cs.Iop = 0xC4; /* LES */ 5259 getregs(cdb,mES); // allocate ES 5260 } 5261 } 5262 reg_t reg; 5263 allocreg(cdb,&retregs,®,TYint); 5264 code_newreg(&cs, reg); 5265 if (sz == 1 && I64 && reg >= 4) 5266 cs.Irex |= REX; 5267 cdb.gen(&cs); // MOV reg,EA 5268 cs2 = cs; 5269 5270 /* If lvalue is a register variable, we must mark it as modified */ 5271 modEA(cdb,&cs); 5272 5273 cs.Iop = 0x81 ^ isbyte; 5274 cs.Irm &= ~cast(int)modregrm(0,7,0); // reg field = 0 5275 cs.Irex &= ~REX_R; 5276 if (op == OPpostdec) 5277 cs.Irm |= modregrm(0,5,0); /* SUB */ 5278 cs.IFL2 = FLconst; 5279 targ_int n = e2.EV.Vint; 5280 cs.IEV2.Vint = n; 5281 if (n == 1) /* can use INC or DEC */ 5282 { 5283 cs.Iop |= 0xFE; /* xFE is dec byte, xFF is word */ 5284 if (op == OPpostdec) 5285 NEWREG(cs.Irm,1); // DEC EA 5286 else 5287 NEWREG(cs.Irm,0); // INC EA 5288 } 5289 else if (n == -1) // can use INC or DEC 5290 { 5291 cs.Iop |= 0xFE; // xFE is dec byte, xFF is word 5292 if (op == OPpostinc) 5293 NEWREG(cs.Irm,1); // DEC EA 5294 else 5295 NEWREG(cs.Irm,0); // INC EA 5296 } 5297 5298 // For scheduling purposes, we wish to replace: 5299 // MOV reg,EA 5300 // OP EA 5301 // with: 5302 // MOV reg,EA 5303 // OP reg 5304 // MOV EA,reg 5305 // ~OP reg 5306 if (sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 5307 config.target_cpu >= TARGET_Pentium && 5308 config.flags4 & CFG4speed) 5309 { 5310 // Replace EA in cs with reg 5311 cs.Irm = (cs.Irm & ~cast(int)modregrm(3,0,7)) | modregrm(3,0,reg & 7); 5312 if (reg & 8) 5313 { cs.Irex &= ~REX_R; 5314 cs.Irex |= REX_B; 5315 } 5316 else 5317 cs.Irex &= ~REX_B; 5318 if (I64 && sz == 1 && reg >= 4) 5319 cs.Irex |= REX; 5320 cdb.gen(&cs); // ADD/SUB reg,const 5321 5322 // Reverse MOV direction 5323 cs2.Iop ^= 2; 5324 cdb.gen(&cs2); // MOV EA,reg 5325 5326 // Toggle INC <. DEC, ADD <. SUB 5327 cs.Irm ^= (n == 1 || n == -1) ? modregrm(0,1,0) : modregrm(0,5,0); 5328 cdb.gen(&cs); 5329 5330 if (*pretregs & mPSW) 5331 { *pretregs &= ~mPSW; // flags already set 5332 code_orflag(cdb.last(),CFpsw); 5333 } 5334 } 5335 else 5336 cdb.gen(&cs); // ADD/SUB EA,const 5337 5338 freenode(e2); 5339 if (tyfv(tyml)) 5340 { 5341 reg_t preg; 5342 5343 getlvalue_msw(&cs); 5344 if (*pretregs & mES) 5345 { 5346 preg = ES; 5347 /* ES is already loaded if CFes is 0 */ 5348 cs.Iop = ((cs.Iflags & CFSEG) == CFes) ? 0x8E : NOP; 5349 NEWREG(cs.Irm,0); /* MOV ES,EA+2 */ 5350 } 5351 else 5352 { 5353 regm_t retregsx = *pretregs & mMSW; 5354 if (!retregsx) 5355 retregsx = mMSW; 5356 allocreg(cdb,&retregsx,&preg,TYint); 5357 cs.Iop = 0x8B; 5358 if (I32) 5359 cs.Iflags |= CFopsize; 5360 NEWREG(cs.Irm,preg); /* MOV preg,EA+2 */ 5361 } 5362 getregs(cdb,mask(preg)); 5363 cdb.gen(&cs); 5364 retregs = mask(reg) | mask(preg); 5365 } 5366 fixresult(cdb,e,retregs,pretregs); 5367 return; 5368 } 5369 else if (tyml == TYhptr) 5370 { 5371 uint rvalue; 5372 reg_t lreg; 5373 reg_t rtmp; 5374 regm_t mtmp; 5375 5376 rvalue = e2.EV.Vlong; 5377 freenode(e2); 5378 5379 // If h--, convert to h++ 5380 if (e.Eoper == OPpostdec) 5381 rvalue = -rvalue; 5382 5383 regm_t retregs = mLSW & ~idxregs & *pretregs; 5384 if (!retregs) 5385 retregs = mLSW & ~idxregs; 5386 allocreg(cdb,&retregs,&lreg,TYint); 5387 5388 // Can't use LES if the EA uses ES as a seg override 5389 if (*pretregs & mES && (cs.Iflags & CFSEG) != CFes) 5390 { cs.Iop = 0xC4; 5391 retregs |= mES; 5392 getregs(cdb,mES|mCX); // allocate ES 5393 cs.Irm |= modregrm(0,lreg,0); 5394 cdb.gen(&cs); // LES lreg,EA 5395 } 5396 else 5397 { cs.Iop = 0x8B; 5398 retregs |= mDX; 5399 getregs(cdb,mDX|mCX); 5400 cs.Irm |= modregrm(0,lreg,0); 5401 cdb.gen(&cs); // MOV lreg,EA 5402 NEWREG(cs.Irm,DX); 5403 getlvalue_msw(&cs); 5404 cdb.gen(&cs); // MOV DX,EA+2 5405 getlvalue_lsw(&cs); 5406 } 5407 5408 // Allocate temporary register, rtmp 5409 mtmp = ALLREGS & ~mCX & ~idxregs & ~retregs; 5410 allocreg(cdb,&mtmp,&rtmp,TYint); 5411 5412 movregconst(cdb,rtmp,rvalue >> 16,0); // MOV rtmp,e2+2 5413 getregs(cdb,mtmp); 5414 cs.Iop = 0x81; 5415 NEWREG(cs.Irm,0); 5416 cs.IFL2 = FLconst; 5417 cs.IEV2.Vint = rvalue; 5418 cdb.gen(&cs); // ADD EA,e2 5419 code_orflag(cdb.last(),CFpsw); 5420 cdb.genc2(0x81,modregrm(3,2,rtmp),0); // ADC rtmp,0 5421 genshift(cdb); // MOV CX,offset __AHSHIFT 5422 cdb.gen2(0xD3,modregrm(3,4,rtmp)); // SHL rtmp,CL 5423 cs.Iop = 0x01; 5424 NEWREG(cs.Irm,rtmp); // ADD EA+2,rtmp 5425 getlvalue_msw(&cs); 5426 cdb.gen(&cs); 5427 fixresult(cdb,e,retregs,pretregs); 5428 return; 5429 } 5430 else if (sz == 2 * REGSIZE) 5431 { 5432 regm_t retregs = allregs & ~idxregs & *pretregs; 5433 if ((retregs & mLSW) == 0) 5434 retregs |= mLSW & ~idxregs; 5435 if ((retregs & mMSW) == 0) 5436 retregs |= ALLREGS & mMSW; 5437 assert(retregs & mMSW && retregs & mLSW); 5438 reg_t reg; 5439 allocreg(cdb,&retregs,®,tyml); 5440 uint sreg = findreglsw(retregs); 5441 cs.Iop = 0x8B; 5442 cs.Irm |= modregrm(0,sreg,0); 5443 cdb.gen(&cs); // MOV sreg,EA 5444 NEWREG(cs.Irm,reg); 5445 getlvalue_msw(&cs); 5446 cdb.gen(&cs); // MOV reg,EA+2 5447 cs.Iop = 0x81; 5448 cs.Irm &= ~cast(int)modregrm(0,7,0); /* reg field = 0 for ADD */ 5449 if (op == OPpostdec) 5450 cs.Irm |= modregrm(0,5,0); /* SUB */ 5451 getlvalue_lsw(&cs); 5452 cs.IFL2 = FLconst; 5453 cs.IEV2.Vlong = e2.EV.Vlong; 5454 cdb.gen(&cs); // ADD/SUB EA,const 5455 code_orflag(cdb.last(),CFpsw); 5456 getlvalue_msw(&cs); 5457 cs.IEV2.Vlong = 0; 5458 if (op == OPpostinc) 5459 cs.Irm ^= modregrm(0,2,0); /* ADC */ 5460 else 5461 cs.Irm ^= modregrm(0,6,0); /* SBB */ 5462 cs.IEV2.Vlong = cast(targ_long)(e2.EV.Vullong >> (REGSIZE * 8)); 5463 cdb.gen(&cs); // ADC/SBB EA,0 5464 freenode(e2); 5465 fixresult(cdb,e,retregs,pretregs); 5466 return; 5467 } 5468 else 5469 { 5470 assert(0); 5471 } 5472 } 5473 5474 5475 void cderr(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5476 { 5477 debug 5478 elem_print(e); 5479 5480 //printf("op = %d, %d\n", e.Eoper, OPstring); 5481 //printf("string = %p, len = %d\n", e.EV.ss.Vstring, e.EV.ss.Vstrlen); 5482 //printf("string = '%.*s'\n", cast(int)e.EV.ss.Vstrlen, e.EV.ss.Vstring); 5483 assert(0); 5484 } 5485 5486 void cdinfo(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5487 { 5488 switch (e.EV.E1.Eoper) 5489 { 5490 version (MARS) 5491 { 5492 case OPdctor: 5493 codelem(cdb,e.EV.E2,pretregs,false); 5494 regm_t retregs = 0; 5495 codelem(cdb,e.EV.E1,&retregs,false); 5496 break; 5497 } 5498 version (SCPP) 5499 { 5500 case OPdtor: 5501 cdcomma(cdb,e,pretregs); 5502 break; 5503 case OPctor: 5504 codelem(cdb,e.EV.E2,pretregs,false); 5505 regm_t retregs = 0; 5506 codelem(cdb,e.EV.E1,&retregs,false); 5507 break; 5508 case OPmark: 5509 if (0 && config.exe == EX_WIN32) 5510 { 5511 const idx = except_index_get(); 5512 except_mark(); 5513 codelem(cdb,e.EV.E2,pretregs,false); 5514 if (config.exe == EX_WIN32 && idx != except_index_get()) 5515 { usednteh |= NTEHcleanup; 5516 nteh_gensindex(cdb,idx - 1); 5517 } 5518 except_release(); 5519 assert(idx == except_index_get()); 5520 } 5521 else 5522 { 5523 code cs = void; 5524 cs.Iop = ESCAPE | ESCmark; 5525 cs.Iflags = 0; 5526 cs.Irex = 0; 5527 cdb.gen(&cs); 5528 codelem(cdb,e.EV.E2,pretregs,false); 5529 cs.Iop = ESCAPE | ESCrelease; 5530 cdb.gen(&cs); 5531 } 5532 freenode(e.EV.E1); 5533 break; 5534 } 5535 default: 5536 assert(0); 5537 } 5538 } 5539 5540 /******************************************* 5541 * D constructor. 5542 * OPdctor 5543 */ 5544 5545 void cddctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5546 { 5547 /* Generate: 5548 ESCAPE | ESCdctor 5549 MOV sindex[BP],index 5550 */ 5551 usednteh |= EHcleanup; 5552 if (config.ehmethod == EHmethod.EH_WIN32) 5553 { usednteh |= NTEHcleanup | NTEH_try; 5554 nteh_usevars(); 5555 } 5556 assert(*pretregs == 0); 5557 code cs; 5558 cs.Iop = ESCAPE | ESCdctor; // mark start of EH range 5559 cs.Iflags = 0; 5560 cs.Irex = 0; 5561 cs.IFL1 = FLctor; 5562 cs.IEV1.Vtor = e; 5563 cdb.gen(&cs); 5564 nteh_gensindex(cdb,0); // the actual index will be patched in later 5565 // by except_fillInEHTable() 5566 } 5567 5568 /******************************************* 5569 * D destructor. 5570 * OPddtor 5571 */ 5572 5573 void cdddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5574 { 5575 if (config.ehmethod == EHmethod.EH_DWARF) 5576 { 5577 usednteh |= EHcleanup; 5578 5579 code cs; 5580 cs.Iop = ESCAPE | ESCddtor; // mark end of EH range and where landing pad is 5581 cs.Iflags = 0; 5582 cs.Irex = 0; 5583 cs.IFL1 = FLdtor; 5584 cs.IEV1.Vtor = e; 5585 cdb.gen(&cs); 5586 5587 // Mark all registers as destroyed 5588 getregsNoSave(allregs); 5589 5590 assert(*pretregs == 0); 5591 codelem(cdb,e.EV.E1,pretregs,false); 5592 return; 5593 } 5594 else 5595 { 5596 /* Generate: 5597 ESCAPE | ESCddtor 5598 MOV sindex[BP],index 5599 CALL dtor 5600 JMP L1 5601 Ldtor: 5602 ... e.EV.E1 ... 5603 RET 5604 L1: NOP 5605 */ 5606 usednteh |= EHcleanup; 5607 if (config.ehmethod == EHmethod.EH_WIN32) 5608 { usednteh |= NTEHcleanup | NTEH_try; 5609 nteh_usevars(); 5610 } 5611 5612 code cs; 5613 cs.Iop = ESCAPE | ESCddtor; 5614 cs.Iflags = 0; 5615 cs.Irex = 0; 5616 cs.IFL1 = FLdtor; 5617 cs.IEV1.Vtor = e; 5618 cdb.gen(&cs); 5619 5620 nteh_gensindex(cdb,0); // the actual index will be patched in later 5621 // by except_fillInEHTable() 5622 5623 // Mark all registers as destroyed 5624 getregsNoSave(allregs); 5625 5626 assert(*pretregs == 0); 5627 CodeBuilder cdbx; 5628 cdbx.ctor(); 5629 codelem(cdbx,e.EV.E1,pretregs,false); 5630 cdbx.gen1(0xC3); // RET 5631 code *c = cdbx.finish(); 5632 5633 int nalign = 0; 5634 if (STACKALIGN >= 16) 5635 { 5636 nalign = STACKALIGN - REGSIZE; 5637 cod3_stackadj(cdb, nalign); 5638 } 5639 calledafunc = 1; 5640 genjmp(cdb,0xE8,FLcode,cast(block *)c); // CALL Ldtor 5641 if (nalign) 5642 cod3_stackadj(cdb, -nalign); 5643 5644 code *cnop = gennop(null); 5645 5646 genjmp(cdb,JMP,FLcode,cast(block *)cnop); 5647 cdb.append(cdbx); 5648 cdb.append(cnop); 5649 return; 5650 } 5651 } 5652 5653 5654 /******************************************* 5655 * C++ constructor. 5656 */ 5657 5658 void cdctor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5659 { 5660 version (SCPP) 5661 { 5662 usednteh |= EHcleanup; 5663 if (config.exe == EX_WIN32) 5664 usednteh |= NTEHcleanup; 5665 assert(*pretregs == 0); 5666 5667 code cs = void; 5668 cs.Iop = ESCAPE | ESCctor; 5669 cs.Iflags = 0; 5670 cs.Irex = 0; 5671 cs.IFL1 = FLctor; 5672 cs.IEV1.Vtor = e; 5673 cdb.gen(&cs); 5674 } 5675 } 5676 5677 /****** 5678 * OPdtor 5679 */ 5680 void cddtor(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5681 { 5682 version (SCPP) 5683 { 5684 usednteh |= EHcleanup; 5685 if (config.exe == EX_WIN32) 5686 usednteh |= NTEHcleanup; 5687 assert(*pretregs == 0); 5688 5689 code cs = void; 5690 cs.Iop = ESCAPE | ESCdtor; 5691 cs.Iflags = 0; 5692 cs.Irex = 0; 5693 cs.IFL1 = FLdtor; 5694 cs.IEV1.Vtor = e; 5695 cdb.gen(&cs); 5696 } 5697 } 5698 5699 void cdmark(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5700 { 5701 } 5702 5703 static if (!NTEXCEPTIONS) 5704 { 5705 void cdsetjmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5706 { 5707 assert(0); 5708 } 5709 } 5710 5711 /***************************************** 5712 */ 5713 5714 void cdvoid(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5715 { 5716 assert(*pretregs == 0); 5717 codelem(cdb,e.EV.E1,pretregs,false); 5718 } 5719 5720 /***************************************** 5721 */ 5722 5723 void cdhalt(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 5724 { 5725 assert(*pretregs == 0); 5726 cdb.gen1(config.target_cpu >= TARGET_80286 ? UD2 : INT3); 5727 } 5728 5729 }