1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Mostly code generation for assignment operators. 6 * 7 * Copyright: Copyright (C) 1985-1998 by Symantec 8 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 9 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 10 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 11 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d) 12 * Documentation: https://dlang.org/phobos/dmd_backend_cod4.html 13 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod4.d 14 */ 15 16 module dmd.backend.cod4; 17 18 version (SCPP) 19 version = COMPILE; 20 version (MARS) 21 version = COMPILE; 22 23 version (COMPILE) 24 { 25 26 import core.stdc.stdio; 27 import core.stdc.stdlib; 28 import core.stdc.string; 29 30 import dmd.backend.cc; 31 import dmd.backend.cdef; 32 import dmd.backend.code; 33 import dmd.backend.code_x86; 34 import dmd.backend.codebuilder; 35 import dmd.backend.mem; 36 import dmd.backend.el; 37 import dmd.backend.global; 38 import dmd.backend.oper; 39 import dmd.backend.ty; 40 import dmd.backend.evalu8 : el_toldoubled; 41 import dmd.backend.xmm; 42 43 extern (C++): 44 45 nothrow: 46 47 int REGSIZE(); 48 49 extern __gshared CGstate cgstate; 50 extern __gshared bool[FLMAX] datafl; 51 52 private extern (D) uint mask(uint m) { return 1 << m; } 53 54 /* AX,CX,DX,BX */ 55 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; 56 57 // from divcoeff.c 58 extern (C) 59 { 60 bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost); 61 bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost); 62 } 63 64 /******************************* 65 * Return number of times symbol s appears in tree e. 66 */ 67 68 private int intree(Symbol *s,elem *e) 69 { 70 if (!OTleaf(e.Eoper)) 71 return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0); 72 return e.Eoper == OPvar && e.EV.Vsym == s; 73 } 74 75 /*********************************** 76 * Determine if expression e can be evaluated directly into register 77 * variable s. 78 * Have to be careful about things like x=x+x+x, and x=a+x. 79 * Returns: 80 * !=0 can 81 * 0 can't 82 */ 83 84 int doinreg(Symbol *s, elem *e) 85 { 86 int in_ = 0; 87 OPER op; 88 89 L1: 90 op = e.Eoper; 91 if (op == OPind || 92 OTcall(op) || 93 OTleaf(op) || 94 (in_ = intree(s,e)) == 0 || 95 (OTunary(op) && OTleaf(e.EV.E1.Eoper)) 96 ) 97 return 1; 98 if (in_ == 1) 99 { 100 switch (op) 101 { 102 case OPadd: 103 case OPmin: 104 case OPand: 105 case OPor: 106 case OPxor: 107 case OPshl: 108 case OPmul: 109 if (!intree(s,e.EV.E2)) 110 { 111 e = e.EV.E1; 112 goto L1; 113 } 114 break; 115 116 default: 117 break; 118 } 119 } 120 return 0; 121 } 122 123 /**************************** 124 * Return code for saving common subexpressions if EA 125 * turns out to be a register. 126 * This is called just before modifying an EA. 127 */ 128 129 void modEA(ref CodeBuilder cdb,code *c) 130 { 131 if ((c.Irm & 0xC0) == 0xC0) // addressing mode refers to a register 132 { 133 reg_t reg = c.Irm & 7; 134 if (c.Irex & REX_B) 135 { reg |= 8; 136 assert(I64); 137 } 138 getregs(cdb,mask(reg)); 139 } 140 } 141 142 static if (TARGET_WINDOS) 143 { 144 // This code is for CPUs that do not support the 8087 145 146 /**************************** 147 * Gen code for op= for doubles. 148 */ 149 150 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op) 151 { 152 static immutable uint[OPdivass - OPpostinc + 1] clibtab = 153 /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass */ 154 [ CLIB.dadd, CLIB.dsub, cast(uint)-1, CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ]; 155 156 if (config.inline8087) 157 { 158 opass87(cdb,e,pretregs); 159 return; 160 } 161 162 code cs; 163 regm_t retregs2,retregs,idxregs; 164 165 uint clib = clibtab[op - OPpostinc]; 166 elem *e1 = e.EV.E1; 167 tym_t tym = tybasic(e1.Ety); 168 getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX); 169 170 if (tym == TYfloat) 171 { 172 clib += CLIB.fadd - CLIB.dadd; /* convert to float operation */ 173 174 // Load EA into FLOATREGS 175 getregs(cdb,FLOATREGS); 176 cs.Iop = LOD; 177 cs.Irm |= modregrm(0,AX,0); 178 cdb.gen(&cs); 179 180 if (!I32) 181 { 182 cs.Irm |= modregrm(0,DX,0); 183 getlvalue_msw(&cs); 184 cdb.gen(&cs); 185 getlvalue_lsw(&cs); 186 187 } 188 retregs2 = FLOATREGS2; 189 idxregs = FLOATREGS | idxregm(&cs); 190 retregs = FLOATREGS; 191 } 192 else 193 { 194 if (I32) 195 { 196 // Load EA into DOUBLEREGS 197 getregs(cdb,DOUBLEREGS_32); 198 cs.Iop = LOD; 199 cs.Irm |= modregrm(0,AX,0); 200 cdb.gen(&cs); 201 cs.Irm |= modregrm(0,DX,0); 202 getlvalue_msw(&cs); 203 cdb.gen(&cs); 204 getlvalue_lsw(&cs); 205 206 retregs2 = DOUBLEREGS2_32; 207 idxregs = DOUBLEREGS_32 | idxregm(&cs); 208 } 209 else 210 { 211 // Push EA onto stack 212 cs.Iop = 0xFF; 213 cs.Irm |= modregrm(0,6,0); 214 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 215 cdb.gen(&cs); 216 getlvalue_lsw(&cs); 217 cdb.gen(&cs); 218 getlvalue_lsw(&cs); 219 cdb.gen(&cs); 220 getlvalue_lsw(&cs); 221 cdb.gen(&cs); 222 stackpush += DOUBLESIZE; 223 224 retregs2 = DOUBLEREGS_16; 225 idxregs = idxregm(&cs); 226 } 227 retregs = DOUBLEREGS; 228 } 229 230 if ((cs.Iflags & CFSEG) == CFes) 231 idxregs |= mES; 232 cgstate.stackclean++; 233 scodelem(cdb,e.EV.E2,&retregs2,idxregs,false); 234 cgstate.stackclean--; 235 callclib(cdb,e,clib,&retregs,0); 236 if (e1.Ecount) 237 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 238 freenode(e1); 239 cs.Iop = STO; // MOV EA,DOUBLEREGS 240 fltregs(cdb,&cs,tym); 241 fixresult(cdb,e,retregs,pretregs); 242 } 243 244 /**************************** 245 * Gen code for OPnegass for doubles. 246 */ 247 248 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 249 { 250 if (config.inline8087) 251 { 252 cdnegass87(cdb,e,pretregs); 253 return; 254 } 255 elem *e1 = e.EV.E1; 256 tym_t tym = tybasic(e1.Ety); 257 int sz = _tysize[tym]; 258 code cs; 259 260 getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0); 261 modEA(cdb,&cs); 262 cs.Irm |= modregrm(0,6,0); 263 cs.Iop = 0x80; 264 cs.IEV1.Voffset += sz - 1; 265 cs.IFL2 = FLconst; 266 cs.IEV2.Vuns = 0x80; 267 cdb.gen(&cs); // XOR 7[EA],0x80 268 if (tycomplex(tym)) 269 { 270 cs.IEV1.Voffset -= sz / 2; 271 cdb.gen(&cs); // XOR 7[EA],0x80 272 } 273 274 regm_t retregs; 275 if (*pretregs || e1.Ecount) 276 { 277 cs.IEV1.Voffset -= sz - 1; 278 279 if (tym == TYfloat) 280 { 281 // Load EA into FLOATREGS 282 getregs(cdb,FLOATREGS); 283 cs.Iop = LOD; 284 NEWREG(cs.Irm, AX); 285 cdb.gen(&cs); 286 287 if (!I32) 288 { 289 NEWREG(cs.Irm, DX); 290 getlvalue_msw(&cs); 291 cdb.gen(&cs); 292 getlvalue_lsw(&cs); 293 294 } 295 retregs = FLOATREGS; 296 } 297 else 298 { 299 if (I32) 300 { 301 // Load EA into DOUBLEREGS 302 getregs(cdb,DOUBLEREGS_32); 303 cs.Iop = LOD; 304 cs.Irm &= ~cast(uint)modregrm(0,7,0); 305 cs.Irm |= modregrm(0,AX,0); 306 cdb.gen(&cs); 307 cs.Irm |= modregrm(0,DX,0); 308 getlvalue_msw(&cs); 309 cdb.gen(&cs); 310 getlvalue_lsw(&cs); 311 } 312 else 313 { 314 static if (1) 315 { 316 cs.Iop = LOD; 317 fltregs(cdb,&cs,TYdouble); // MOV DOUBLEREGS, EA 318 } 319 else 320 { 321 // Push EA onto stack 322 cs.Iop = 0xFF; 323 cs.Irm |= modregrm(0,6,0); 324 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 325 cdb.gen(&cs); 326 cs.IEV1.Voffset -= REGSIZE; 327 cdb.gen(&cs); 328 cs.IEV1.Voffset -= REGSIZE; 329 cdb.gen(&cs); 330 cs.IEV1.Voffset -= REGSIZE; 331 cdb.gen(&cs); 332 stackpush += DOUBLESIZE; 333 } 334 } 335 retregs = DOUBLEREGS; 336 } 337 if (e1.Ecount) 338 cssave(e1,retregs,!OTleaf(e1.Eoper)); /* if lvalue is a CSE */ 339 } 340 else 341 { 342 retregs = 0; 343 assert(e1.Ecount == 0); 344 } 345 346 freenode(e1); 347 fixresult(cdb,e,retregs,pretregs); 348 } 349 } 350 351 352 353 /************************ 354 * Generate code for an assignment. 355 */ 356 357 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 358 { 359 tym_t tymll; 360 reg_t reg; 361 code cs; 362 elem *e11; 363 bool regvar; // true means evaluate into register variable 364 regm_t varregm; 365 reg_t varreg; 366 targ_int postinc; 367 368 //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 369 elem *e1 = e.EV.E1; 370 elem *e2 = e.EV.E2; 371 int e2oper = e2.Eoper; 372 tym_t tyml = tybasic(e1.Ety); // type of lvalue 373 regm_t retregs = *pretregs; 374 375 if (tyxmmreg(tyml) && config.fpxmmregs) 376 { 377 xmmeq(cdb, e, CMP, e1, e2, pretregs); 378 return; 379 } 380 381 if (tyfloating(tyml) && config.inline8087) 382 { 383 if (tycomplex(tyml)) 384 { 385 complex_eq87(cdb, e, pretregs); 386 return; 387 } 388 389 if (!(retregs == 0 && 390 (e2oper == OPconst || e2oper == OPvar || e2oper == OPind)) 391 ) 392 { 393 eq87(cdb,e,pretregs); 394 return; 395 } 396 if (config.target_cpu >= TARGET_PentiumPro && 397 (e2oper == OPvar || e2oper == OPind) 398 ) 399 { 400 eq87(cdb,e,pretregs); 401 return; 402 } 403 if (tyml == TYldouble || tyml == TYildouble) 404 { 405 eq87(cdb,e,pretregs); 406 return; 407 } 408 } 409 410 uint sz = _tysize[tyml]; // # of bytes to transfer 411 assert(cast(int)sz > 0); 412 413 if (retregs == 0) // if no return value 414 { 415 int fl; 416 417 /* If registers are tight, and we might need them for the lvalue, 418 * prefer to not use them for the rvalue 419 */ 420 bool plenty = true; 421 if (e1.Eoper == OPind) 422 { 423 /* Will need 1 register for evaluation, +2 registers for 424 * e1's addressing mode 425 */ 426 regm_t m = allregs & ~regcon.mvar; // mask of non-register variables 427 m &= m - 1; // clear least significant bit 428 m &= m - 1; // clear least significant bit 429 plenty = m != 0; // at least 3 registers 430 } 431 432 if ((e2oper == OPconst || // if rvalue is a constant 433 e2oper == OPrelconst && 434 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 435 ((fl = el_fl(e2)) == FLdata || 436 fl==FLudata || fl == FLextern) 437 && !(e2.EV.Vsym.ty() & mTYcs) 438 ) && 439 !(evalinregister(e2) && plenty) && 440 !e1.Ecount) // and no CSE headaches 441 { 442 // Look for special case of (*p++ = ...), where p is a register variable 443 if (e1.Eoper == OPind && 444 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 445 e11.EV.E1.Eoper == OPvar && 446 e11.EV.E1.EV.Vsym.Sfl == FLreg && 447 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 448 ) 449 { 450 Symbol *s = e11.EV.E1.EV.Vsym; 451 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 452 { 453 regcon.params &= ~s.Spregm(); 454 } 455 postinc = e11.EV.E2.EV.Vint; 456 if (e11.Eoper == OPpostdec) 457 postinc = -postinc; 458 getlvalue(cdb,&cs,e1,RMstore); 459 freenode(e11.EV.E2); 460 } 461 else 462 { 463 postinc = 0; 464 getlvalue(cdb,&cs,e1,RMstore); 465 466 if (e2oper == OPconst && 467 config.flags4 & CFG4speed && 468 (config.target_cpu == TARGET_Pentium || 469 config.target_cpu == TARGET_PentiumMMX) && 470 (cs.Irm & 0xC0) == 0x80 471 ) 472 { 473 if (I64 && sz == 8 && e2.EV.Vpointer) 474 { 475 // MOV reg,imm64 476 // MOV EA,reg 477 regm_t rregm = allregs & ~idxregm(&cs); 478 reg_t regx; 479 regwithvalue(cdb,rregm,e2.EV.Vpointer,®x,64); 480 cs.Iop = STO; 481 cs.Irm |= modregrm(0,regx & 7,0); 482 if (regx & 8) 483 cs.Irex |= REX_R; 484 cdb.gen(&cs); 485 freenode(e2); 486 goto Lp; 487 } 488 if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint) 489 { 490 // MOV reg,imm 491 // MOV EA,reg 492 regm_t rregm = allregs & ~idxregm(&cs); 493 reg_t regx; 494 regwithvalue(cdb,rregm,e2.EV.Vint,®x,0); 495 cs.Iop = STO; 496 cs.Irm |= modregrm(0,regx & 7,0); 497 if (regx & 8) 498 cs.Irex |= REX_R; 499 cdb.gen(&cs); 500 freenode(e2); 501 goto Lp; 502 } 503 if (sz == 2 * REGSIZE && e2.EV.Vllong == 0) 504 { 505 // MOV reg,imm 506 // MOV EA,reg 507 // MOV EA+2,reg 508 regm_t rregm = getscratch() & ~idxregm(&cs); 509 if (rregm) 510 { 511 reg_t regx; 512 regwithvalue(cdb,rregm,e2.EV.Vint,®x,0); 513 cs.Iop = STO; 514 cs.Irm |= modregrm(0,regx,0); 515 cdb.gen(&cs); 516 getlvalue_msw(&cs); 517 cdb.gen(&cs); 518 freenode(e2); 519 goto Lp; 520 } 521 } 522 } 523 } 524 525 // If loading result into a register 526 if ((cs.Irm & 0xC0) == 0xC0) 527 { 528 modEA(cdb,&cs); 529 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg) 530 getregs(cdb,cs.IEV1.Vsym.Sregm); 531 } 532 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 533 534 if (e2oper == OPrelconst) 535 { 536 cs.IEV2.Voffset = e2.EV.Voffset; 537 cs.IFL2 = cast(ubyte)fl; 538 cs.IEV2.Vsym = e2.EV.Vsym; 539 cs.Iflags |= CFoff; 540 cdb.gen(&cs); // MOV EA,&variable 541 if (I64 && sz == 8) 542 code_orrex(cdb.last(), REX_W); 543 if (sz > REGSIZE) 544 { 545 cs.Iop = 0x8C; 546 getlvalue_msw(&cs); 547 cs.Irm |= modregrm(0,3,0); 548 cdb.gen(&cs); // MOV EA+2,DS 549 } 550 } 551 else 552 { 553 assert(e2oper == OPconst); 554 cs.IFL2 = FLconst; 555 targ_size_t *p = cast(targ_size_t *) &(e2.EV); 556 cs.IEV2.Vsize_t = *p; 557 // Look for loading a register variable 558 if ((cs.Irm & 0xC0) == 0xC0) 559 { 560 reg_t regx = cs.Irm & 7; 561 562 if (cs.Irex & REX_B) 563 regx |= 8; 564 if (I64 && sz == 8) 565 movregconst(cdb,regx,*p,64); 566 else 567 movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1)); 568 if (sz == 2 * REGSIZE) 569 { getlvalue_msw(&cs); 570 if (REGSIZE == 2) 571 movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0); 572 else if (REGSIZE == 4) 573 movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0); 574 else if (REGSIZE == 8) 575 movregconst(cdb,cs.Irm & 7,p[1],0); 576 else 577 assert(0); 578 } 579 } 580 else if (I64 && sz == 8 && *p >= 0x80000000) 581 { // Use 64 bit MOV, as the 32 bit one gets sign extended 582 // MOV reg,imm64 583 // MOV EA,reg 584 regm_t rregm = allregs & ~idxregm(&cs); 585 reg_t regx; 586 regwithvalue(cdb,rregm,*p,®x,64); 587 cs.Iop = STO; 588 cs.Irm |= modregrm(0,regx & 7,0); 589 if (regx & 8) 590 cs.Irex |= REX_R; 591 cdb.gen(&cs); 592 } 593 else 594 { 595 int off = sz; 596 do 597 { int regsize = REGSIZE; 598 if (off >= 4 && I16 && config.target_cpu >= TARGET_80386) 599 { 600 regsize = 4; 601 cs.Iflags |= CFopsize; // use opsize to do 32 bit operation 602 } 603 else if (I64 && sz == 16 && *p >= 0x80000000) 604 { 605 regm_t rregm = allregs & ~idxregm(&cs); 606 reg_t regx; 607 regwithvalue(cdb,rregm,*p,®x,64); 608 cs.Iop = STO; 609 cs.Irm |= modregrm(0,regx & 7,0); 610 if (regx & 8) 611 cs.Irex |= REX_R; 612 } 613 else 614 { 615 regm_t retregsx = (sz == 1) ? BYTEREGS : allregs; 616 reg_t regx; 617 if (reghasvalue(retregsx,*p,®x)) 618 { 619 cs.Iop = (cs.Iop & 1) | 0x88; 620 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx 621 if (regx & 8) 622 cs.Irex |= REX_R; 623 if (I64 && sz == 1 && regx >= 4) 624 cs.Irex |= REX; 625 } 626 if (!I16 && off == 2) // if 16 bit operand 627 cs.Iflags |= CFopsize; 628 if (I64 && sz == 8) 629 cs.Irex |= REX_W; 630 } 631 cdb.gen(&cs); // MOV EA,const 632 633 p = cast(targ_size_t *)(cast(char *) p + regsize); 634 cs.Iop = (cs.Iop & 1) | 0xC6; 635 cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0); 636 cs.Irex &= ~REX_R; 637 cs.IEV1.Voffset += regsize; 638 cs.IEV2.Vint = cast(int)*p; 639 off -= regsize; 640 } while (off > 0); 641 } 642 } 643 freenode(e2); 644 goto Lp; 645 } 646 retregs = allregs; // pick a reg, any reg 647 if (sz == 2 * REGSIZE) 648 retregs &= ~mBP; // BP cannot be used for register pair 649 } 650 if (retregs == mPSW) 651 { 652 retregs = allregs; 653 if (sz == 2 * REGSIZE) 654 retregs &= ~mBP; // BP cannot be used for register pair 655 } 656 cs.Iop = STO; 657 if (sz == 1) // must have byte regs 658 { 659 cs.Iop = 0x88; 660 retregs &= BYTEREGS; 661 if (!retregs) 662 retregs = BYTEREGS; 663 } 664 else if (retregs & mES && 665 ( 666 (e1.Eoper == OPind && 667 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) || 668 (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata) 669 ) 670 ) 671 // getlvalue() needs ES, so we can't return it 672 retregs = allregs; // no conflicts with ES 673 else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0) 674 retregs = DOUBLEREGS; 675 676 regvar = false; 677 varregm = 0; 678 if (config.flags4 & CFG4optimized) 679 { 680 // Be careful of cases like (x = x+x+x). We cannot evaluate in 681 // x if x is in a register. 682 if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable 683 doinreg(e1.EV.Vsym,e2) && // and we can compute directly into it 684 !(sz == 1 && e1.EV.Voffset == 1) 685 ) 686 { 687 regvar = true; 688 retregs = varregm; 689 reg = varreg; // evaluate directly in target register 690 if (tysize(e1.Ety) == REGSIZE && 691 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE) 692 { 693 if (e1.EV.Voffset) 694 retregs &= mMSW; 695 else 696 retregs &= mLSW; 697 reg = findreg(retregs); 698 } 699 } 700 } 701 if (*pretregs & mPSW && OTleaf(e1.Eoper)) // if evaluating e1 couldn't change flags 702 { // Be careful that this lines up with jmpopcode() 703 retregs |= mPSW; 704 *pretregs &= ~mPSW; 705 } 706 scodelem(cdb,e2,&retregs,0,true); // get rvalue 707 708 // Look for special case of (*p++ = ...), where p is a register variable 709 if (e1.Eoper == OPind && 710 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 711 e11.EV.E1.Eoper == OPvar && 712 e11.EV.E1.EV.Vsym.Sfl == FLreg && 713 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 714 ) 715 { 716 Symbol *s = e11.EV.E1.EV.Vsym; 717 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 718 { 719 regcon.params &= ~s.Spregm(); 720 } 721 722 postinc = e11.EV.E2.EV.Vint; 723 if (e11.Eoper == OPpostdec) 724 postinc = -postinc; 725 getlvalue(cdb,&cs,e1,RMstore | retregs); 726 freenode(e11.EV.E2); 727 } 728 else 729 { 730 postinc = 0; 731 getlvalue(cdb,&cs,e1,RMstore | retregs); // get lvalue (cl == null if regvar) 732 } 733 734 getregs(cdb,varregm); 735 736 assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes)); 737 if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES) 738 { 739 reg = findreglsw(retregs); 740 cs.Irm |= modregrm(0,reg,0); 741 cdb.gen(&cs); // MOV EA,reg 742 getlvalue_msw(&cs); // point to where segment goes 743 cs.Iop = 0x8C; 744 NEWREG(cs.Irm,0); 745 cdb.gen(&cs); // MOV EA+2,ES 746 } 747 else 748 { 749 if (!I16) 750 { 751 reg = findreg(retregs & 752 ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS)); 753 cs.Irm |= modregrm(0,reg & 7,0); 754 if (reg & 8) 755 cs.Irex |= REX_R; 756 for (; true; sz -= REGSIZE) 757 { 758 // Do not generate mov from register onto itself 759 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0))) 760 break; 761 if (sz == 2) // if 16 bit operand 762 cs.Iflags |= CFopsize; 763 else if (sz == 1 && reg >= 4) 764 cs.Irex |= REX; 765 cdb.gen(&cs); // MOV EA+offset,reg 766 if (sz <= REGSIZE) 767 break; 768 getlvalue_msw(&cs); 769 reg = findregmsw(retregs); 770 code_newreg(&cs, reg); 771 } 772 } 773 else 774 { 775 if (sz > REGSIZE) 776 cs.IEV1.Voffset += sz - REGSIZE; // 0,2,6 777 reg = findreg(retregs & 778 (sz > REGSIZE ? mMSW : ALLREGS)); 779 if (tyml == TYdouble || tyml == TYdouble_alias) 780 reg = AX; 781 cs.Irm |= modregrm(0,reg,0); 782 // Do not generate mov from register onto itself 783 if (!regvar || reg != (cs.Irm & 7)) 784 for (; true; sz -= REGSIZE) // 1,2,4 785 { 786 cdb.gen(&cs); // MOV EA+offset,reg 787 if (sz <= REGSIZE) 788 break; 789 cs.IEV1.Voffset -= REGSIZE; 790 if (tyml == TYdouble || tyml == TYdouble_alias) 791 reg = dblreg[reg]; 792 else 793 reg = findreglsw(retregs); 794 NEWREG(cs.Irm,reg); 795 } 796 } 797 } 798 if (e1.Ecount || // if lvalue is a CSE or 799 regvar) // rvalue can't be a CSE 800 { 801 getregs_imm(cdb,retregs); // necessary if both lvalue and 802 // rvalue are CSEs (since a reg 803 // can hold only one e at a time) 804 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 805 } 806 807 fixresult(cdb,e,retregs,pretregs); 808 Lp: 809 if (postinc) 810 { 811 reg_t ireg = findreg(idxregm(&cs)); 812 if (*pretregs & mPSW) 813 { // Use LEA to avoid touching the flags 814 uint rm = cs.Irm & 7; 815 if (cs.Irex & REX_B) 816 rm |= 8; 817 cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc); 818 if (tysize(e11.EV.E1.Ety) == 8) 819 code_orrex(cdb.last(), REX_W); 820 } 821 else if (I64) 822 { 823 cdb.genc2(0x81,modregrmx(3,0,ireg),postinc); 824 if (tysize(e11.EV.E1.Ety) == 8) 825 code_orrex(cdb.last(), REX_W); 826 } 827 else 828 { 829 if (postinc == 1) 830 cdb.gen1(0x40 + ireg); // INC ireg 831 else if (postinc == -cast(targ_int)1) 832 cdb.gen1(0x48 + ireg); // DEC ireg 833 else 834 { 835 cdb.genc2(0x81,modregrm(3,0,ireg),postinc); 836 } 837 } 838 } 839 freenode(e1); 840 } 841 842 843 /************************ 844 * Generate code for += -= &= |= ^= negass 845 */ 846 847 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 848 { 849 //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 850 OPER op = e.Eoper; 851 regm_t retregs = 0; 852 uint reverse = 0; 853 elem *e1 = e.EV.E1; 854 tym_t tyml = tybasic(e1.Ety); // type of lvalue 855 int sz = _tysize[tyml]; 856 int isbyte = (sz == 1); // 1 for byte operation, else 0 857 858 // See if evaluate in XMM registers 859 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0)) 860 { 861 xmmopass(cdb,e,pretregs); 862 return; 863 } 864 865 if (tyfloating(tyml)) 866 { 867 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 868 { 869 if (op == OPnegass) 870 cdnegass87(cdb,e,pretregs); 871 else 872 opass87(cdb,e,pretregs); 873 } 874 else 875 { 876 if (op == OPnegass) 877 opnegassdbl(cdb,e,pretregs); 878 else 879 opassdbl(cdb,e,pretregs,op); 880 } 881 return; 882 } 883 uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386) 884 ? CFopsize : 0; 885 uint cflags = 0; 886 regm_t forccs = *pretregs & mPSW; // return result in flags 887 regm_t forregs = *pretregs & ~mPSW; // return result in regs 888 // true if we want the result in a register 889 uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper)); 890 891 reg_t reg; 892 uint op1,op2,mode; 893 code cs; 894 elem *e2; 895 regm_t varregm; 896 reg_t varreg; 897 uint jop; 898 899 900 switch (op) // select instruction opcodes 901 { 902 case OPpostinc: op = OPaddass; // i++ => += 903 goto case OPaddass; 904 905 case OPaddass: op1 = 0x01; op2 = 0x11; 906 cflags = CFpsw; 907 mode = 0; break; // ADD, ADC 908 909 case OPpostdec: op = OPminass; // i-- => -= 910 goto case OPminass; 911 912 case OPminass: op1 = 0x29; op2 = 0x19; 913 cflags = CFpsw; 914 mode = 5; break; // SUB, SBC 915 916 case OPandass: op1 = op2 = 0x21; 917 mode = 4; break; // AND, AND 918 919 case OPorass: op1 = op2 = 0x09; 920 mode = 1; break; // OR , OR 921 922 case OPxorass: op1 = op2 = 0x31; 923 mode = 6; break; // XOR, XOR 924 925 case OPnegass: op1 = 0xF7; // NEG 926 break; 927 928 default: 929 assert(0); 930 } 931 op1 ^= isbyte; // bit 0 is 0 for byte operation 932 933 if (op == OPnegass) 934 { 935 getlvalue(cdb,&cs,e1,0); 936 modEA(cdb,&cs); 937 cs.Irm |= modregrm(0,3,0); 938 cs.Iop = op1; 939 switch (_tysize[tyml]) 940 { 941 case CHARSIZE: 942 cdb.gen(&cs); 943 break; 944 945 case SHORTSIZE: 946 cdb.gen(&cs); 947 if (!I16 && *pretregs & mPSW) 948 cdb.last().Iflags |= CFopsize | CFpsw; 949 break; 950 951 case LONGSIZE: 952 if (!I16 || opsize) 953 { cdb.gen(&cs); 954 cdb.last().Iflags |= opsize; 955 break; 956 } 957 neg_2reg: 958 getlvalue_msw(&cs); 959 cdb.gen(&cs); // NEG EA+2 960 getlvalue_lsw(&cs); 961 cdb.gen(&cs); // NEG EA 962 code_orflag(cdb.last(),CFpsw); 963 cs.Iop = 0x81; 964 getlvalue_msw(&cs); 965 cs.IFL2 = FLconst; 966 cs.IEV2.Vuns = 0; 967 cdb.gen(&cs); // SBB EA+2,0 968 break; 969 970 case LLONGSIZE: 971 if (I16) 972 assert(0); // not implemented yet 973 if (I32) 974 goto neg_2reg; 975 cdb.gen(&cs); 976 break; 977 978 default: 979 assert(0); 980 } 981 forccs = 0; // flags already set by NEG 982 *pretregs &= ~mPSW; 983 } 984 else if ((e2 = e.EV.E2).Eoper == OPconst && // if rvalue is a const 985 el_signx32(e2) && 986 // Don't evaluate e2 in register if we can use an INC or DEC 987 (((sz <= REGSIZE || tyfv(tyml)) && 988 (op == OPaddass || op == OPminass) && 989 (el_allbits(e2, 1) || el_allbits(e2, -1)) 990 ) || 991 (!evalinregister(e2) 992 && tyml != TYhptr 993 ) 994 ) 995 ) 996 { 997 getlvalue(cdb,&cs,e1,0); 998 modEA(cdb,&cs); 999 cs.IFL2 = FLconst; 1000 cs.IEV2.Vsize_t = e2.EV.Vint; 1001 if (sz <= REGSIZE || tyfv(tyml) || opsize) 1002 { 1003 targ_int i = cs.IEV2.Vint; 1004 1005 // Handle shortcuts. Watch out for if result has 1006 // to be in flags. 1007 1008 if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,®) && i != 1 && i != -1 && 1009 !opsize) 1010 { 1011 cs.Iop = op1; 1012 cs.Irm |= modregrm(0,reg & 7,0); 1013 if (I64) 1014 { if (isbyte && reg >= 4) 1015 cs.Irex |= REX; 1016 if (reg & 8) 1017 cs.Irex |= REX_R; 1018 } 1019 } 1020 else 1021 { 1022 cs.Iop = 0x81; 1023 cs.Irm |= modregrm(0,mode,0); 1024 switch (op) 1025 { 1026 case OPminass: // convert to += 1027 cs.Irm ^= modregrm(0,5,0); 1028 i = -i; 1029 cs.IEV2.Vsize_t = i; 1030 goto case OPaddass; 1031 1032 case OPaddass: 1033 if (i == 1) // INC EA 1034 goto L1; 1035 else if (i == -1) // DEC EA 1036 { cs.Irm |= modregrm(0,1,0); 1037 L1: cs.Iop = 0xFF; 1038 } 1039 break; 1040 1041 default: 1042 break; 1043 } 1044 cs.Iop ^= isbyte; // for byte operations 1045 } 1046 cs.Iflags |= opsize; 1047 if (forccs) 1048 cs.Iflags |= CFpsw; 1049 else if (!I16 && cs.Iflags & CFopsize) 1050 { 1051 switch (op) 1052 { case OPorass: 1053 case OPxorass: 1054 cs.IEV2.Vsize_t &= 0xFFFF; 1055 cs.Iflags &= ~CFopsize; // don't worry about MSW 1056 break; 1057 1058 case OPandass: 1059 cs.IEV2.Vsize_t |= ~0xFFFFL; 1060 cs.Iflags &= ~CFopsize; // don't worry about MSW 1061 break; 1062 1063 case OPminass: 1064 case OPaddass: 1065 static if (1) 1066 { 1067 if ((cs.Irm & 0xC0) == 0xC0) // EA is register 1068 cs.Iflags &= ~CFopsize; 1069 } 1070 else 1071 { 1072 if ((cs.Irm & 0xC0) == 0xC0 && // EA is register and 1073 e1.Eoper == OPind) // not a register var 1074 cs.Iflags &= ~CFopsize; 1075 } 1076 break; 1077 1078 default: 1079 assert(0); 1080 } 1081 } 1082 1083 // For scheduling purposes, we wish to replace: 1084 // OP EA 1085 // with: 1086 // MOV reg,EA 1087 // OP reg 1088 // MOV EA,reg 1089 if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 1090 (config.target_cpu == TARGET_Pentium || 1091 config.target_cpu == TARGET_PentiumMMX) && 1092 config.flags4 & CFG4speed) 1093 { 1094 regm_t sregm; 1095 code cs2; 1096 1097 // Determine which registers to use 1098 sregm = allregs & ~idxregm(&cs); 1099 if (isbyte) 1100 sregm &= BYTEREGS; 1101 if (sregm & forregs) 1102 sregm &= forregs; 1103 1104 allocreg(cdb,&sregm,®,tyml); // allocate register 1105 1106 cs2 = cs; 1107 cs2.Iflags &= ~CFpsw; 1108 cs2.Iop = LOD ^ isbyte; 1109 code_newreg(&cs2, reg); 1110 cdb.gen(&cs2); // MOV reg,EA 1111 1112 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7); 1113 if (reg & 8) 1114 cs.Irex |= REX_B; 1115 cdb.gen(&cs); // OP reg 1116 1117 cs2.Iop ^= 2; 1118 cdb.gen(&cs2); // MOV EA,reg 1119 1120 retregs = sregm; 1121 wantres = 0; 1122 if (e1.Ecount) 1123 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1124 } 1125 else 1126 { 1127 cdb.gen(&cs); 1128 cs.Iflags &= ~opsize; 1129 cs.Iflags &= ~CFpsw; 1130 if (I16 && opsize) // if DWORD operand 1131 cs.IEV1.Voffset += 2; // compensate for wantres code 1132 } 1133 } 1134 else if (sz == 2 * REGSIZE) 1135 { 1136 targ_uns msw; 1137 1138 cs.Iop = 0x81; 1139 cs.Irm |= modregrm(0,mode,0); 1140 cs.Iflags |= cflags; 1141 cdb.gen(&cs); 1142 cs.Iflags &= ~CFpsw; 1143 1144 getlvalue_msw(&cs); // point to msw 1145 msw = cast(uint)MSREG(e.EV.E2.EV.Vllong); 1146 cs.IEV2.Vuns = msw; // msw of constant 1147 switch (op) 1148 { 1149 case OPminass: 1150 cs.Irm ^= modregrm(0,6,0); // SUB => SBB 1151 break; 1152 1153 case OPaddass: 1154 cs.Irm |= modregrm(0,2,0); // ADD => ADC 1155 break; 1156 1157 default: 1158 break; 1159 } 1160 cdb.gen(&cs); 1161 } 1162 else 1163 assert(0); 1164 freenode(e.EV.E2); // don't need it anymore 1165 } 1166 else if (isregvar(e1,&varregm,&varreg) && 1167 (e2.Eoper == OPvar || e2.Eoper == OPind) && 1168 !evalinregister(e2) && 1169 sz <= REGSIZE) // deal with later 1170 { 1171 getlvalue(cdb,&cs,e2,0); 1172 freenode(e2); 1173 getregs(cdb,varregm); 1174 code_newreg(&cs, varreg); 1175 if (I64 && sz == 1 && varreg >= 4) 1176 cs.Irex |= REX; 1177 cs.Iop = op1 ^ 2; // toggle direction bit 1178 if (forccs) 1179 cs.Iflags |= CFpsw; 1180 reverse = 2; // remember we toggled it 1181 cdb.gen(&cs); 1182 retregs = 0; // to trigger a bug if we attempt to use it 1183 } 1184 else if ((op == OPaddass || op == OPminass) && 1185 sz <= REGSIZE && 1186 !e2.Ecount && 1187 ((jop = jmpopcode(e2)) == JC || jop == JNC || 1188 (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC))) 1189 ) 1190 { 1191 /* e1 += (x < y) ADC EA,0 1192 * e1 -= (x < y) SBB EA,0 1193 * e1 += (x >= y) SBB EA,-1 1194 * e1 -= (x >= y) ADC EA,-1 1195 */ 1196 getlvalue(cdb,&cs,e1,0); // get lvalue 1197 modEA(cdb,&cs); 1198 regm_t keepmsk = idxregm(&cs); 1199 retregs = mPSW; 1200 if (OTconv(e2.Eoper)) 1201 { 1202 scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true); 1203 freenode(e2); 1204 } 1205 else 1206 scodelem(cdb,e2,&retregs,keepmsk,true); 1207 cs.Iop = 0x81 ^ isbyte; // ADC EA,imm16/32 1208 uint regop = 2; // ADC 1209 if ((op == OPaddass) ^ (jop == JC)) 1210 regop = 3; // SBB 1211 code_newreg(&cs,regop); 1212 cs.Iflags |= opsize; 1213 if (forccs) 1214 cs.Iflags |= CFpsw; 1215 cs.IFL2 = FLconst; 1216 cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0; 1217 cdb.gen(&cs); 1218 retregs = 0; // to trigger a bug if we attempt to use it 1219 } 1220 else // evaluate e2 into register 1221 { 1222 retregs = (isbyte) ? BYTEREGS : ALLREGS; // pick working reg 1223 if (tyml == TYhptr) 1224 retregs &= ~mCX; // need CX for shift count 1225 scodelem(cdb,e.EV.E2,&retregs,0,true); // get rvalue 1226 getlvalue(cdb,&cs,e1,retregs); // get lvalue 1227 modEA(cdb,&cs); 1228 cs.Iop = op1; 1229 if (sz <= REGSIZE || tyfv(tyml)) 1230 { 1231 reg = findreg(retregs); 1232 code_newreg(&cs, reg); // OP1 EA,reg 1233 if (sz == 1 && reg >= 4 && I64) 1234 cs.Irex |= REX; 1235 if (forccs) 1236 cs.Iflags |= CFpsw; 1237 } 1238 else if (tyml == TYhptr) 1239 { 1240 uint mreg = findregmsw(retregs); 1241 uint lreg = findreglsw(retregs); 1242 getregs(cdb,retregs | mCX); 1243 1244 // If h -= l, convert to h += -l 1245 if (e.Eoper == OPminass) 1246 { 1247 cdb.gen2(0xF7,modregrm(3,3,mreg)); // NEG mreg 1248 cdb.gen2(0xF7,modregrm(3,3,lreg)); // NEG lreg 1249 code_orflag(cdb.last(),CFpsw); 1250 cdb.genc2(0x81,modregrm(3,3,mreg),0); // SBB mreg,0 1251 } 1252 cs.Iop = 0x01; 1253 cs.Irm |= modregrm(0,lreg,0); 1254 cdb.gen(&cs); // ADD EA,lreg 1255 code_orflag(cdb.last(),CFpsw); 1256 cdb.genc2(0x81,modregrm(3,2,mreg),0); // ADC mreg,0 1257 genshift(cdb); // MOV CX,offset __AHSHIFT 1258 cdb.gen2(0xD3,modregrm(3,4,mreg)); // SHL mreg,CL 1259 NEWREG(cs.Irm,mreg); // ADD EA+2,mreg 1260 getlvalue_msw(&cs); 1261 } 1262 else if (sz == 2 * REGSIZE) 1263 { 1264 cs.Irm |= modregrm(0,findreglsw(retregs),0); 1265 cdb.gen(&cs); // OP1 EA,reg+1 1266 code_orflag(cdb.last(),cflags); 1267 cs.Iop = op2; 1268 NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg 1269 getlvalue_msw(&cs); 1270 } 1271 else 1272 assert(0); 1273 cdb.gen(&cs); 1274 retregs = 0; // to trigger a bug if we attempt to use it 1275 } 1276 1277 // See if we need to reload result into a register. 1278 // Need result in registers in case we have a 32 bit 1279 // result and we want the flags as a result. 1280 if (wantres || (sz > REGSIZE && forccs)) 1281 { 1282 if (sz <= REGSIZE) 1283 { 1284 regm_t possregs; 1285 1286 possregs = ALLREGS; 1287 if (isbyte) 1288 possregs = BYTEREGS; 1289 retregs = forregs & possregs; 1290 if (!retregs) 1291 retregs = possregs; 1292 1293 // If reg field is destination 1294 if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5) 1295 { 1296 reg = (cs.Irm >> 3) & 7; 1297 if (cs.Irex & REX_R) 1298 reg |= 8; 1299 retregs = mask(reg); 1300 allocreg(cdb,&retregs,®,tyml); 1301 } 1302 // If lvalue is a register, just use that register 1303 else if ((cs.Irm & 0xC0) == 0xC0) 1304 { 1305 reg = cs.Irm & 7; 1306 if (cs.Irex & REX_B) 1307 reg |= 8; 1308 retregs = mask(reg); 1309 allocreg(cdb,&retregs,®,tyml); 1310 } 1311 else 1312 { 1313 allocreg(cdb,&retregs,®,tyml); 1314 cs.Iop = LOD ^ isbyte ^ reverse; 1315 code_newreg(&cs, reg); 1316 if (I64 && isbyte && reg >= 4) 1317 cs.Irex |= REX_W; 1318 cdb.gen(&cs); // MOV reg,EA 1319 } 1320 } 1321 else if (tyfv(tyml) || tyml == TYhptr) 1322 { 1323 regm_t idxregs; 1324 1325 if (tyml == TYhptr) 1326 getlvalue_lsw(&cs); 1327 idxregs = idxregm(&cs); 1328 retregs = forregs & ~idxregs; 1329 if (!(retregs & IDXREGS)) 1330 retregs |= IDXREGS & ~idxregs; 1331 if (!(retregs & mMSW)) 1332 retregs |= mMSW & ALLREGS; 1333 allocreg(cdb,&retregs,®,tyml); 1334 NEWREG(cs.Irm,findreglsw(retregs)); 1335 if (retregs & mES) // if want ES loaded 1336 { 1337 cs.Iop = 0xC4; 1338 cdb.gen(&cs); // LES lreg,EA 1339 } 1340 else 1341 { 1342 cs.Iop = LOD; 1343 cdb.gen(&cs); // MOV lreg,EA 1344 getlvalue_msw(&cs); 1345 if (I32) 1346 cs.Iflags |= CFopsize; 1347 NEWREG(cs.Irm,reg); 1348 cdb.gen(&cs); // MOV mreg,EA+2 1349 } 1350 } 1351 else if (sz == 2 * REGSIZE) 1352 { 1353 regm_t idx = idxregm(&cs); 1354 retregs = forregs; 1355 if (!retregs) 1356 retregs = ALLREGS; 1357 allocreg(cdb,&retregs,®,tyml); 1358 cs.Iop = LOD; 1359 NEWREG(cs.Irm,reg); 1360 1361 code csl = cs; 1362 NEWREG(csl.Irm,findreglsw(retregs)); 1363 getlvalue_lsw(&csl); 1364 1365 if (mask(reg) & idx) 1366 { 1367 cdb.gen(&csl); // MOV reg+1,EA 1368 cdb.gen(&cs); // MOV reg,EA+2 1369 } 1370 else 1371 { 1372 cdb.gen(&cs); // MOV reg,EA+2 1373 cdb.gen(&csl); // MOV reg+1,EA 1374 } 1375 } 1376 else 1377 assert(0); 1378 if (e1.Ecount) // if we gen a CSE 1379 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1380 } 1381 freenode(e1); 1382 if (sz <= REGSIZE) 1383 *pretregs &= ~mPSW; // flags are already set 1384 fixresult(cdb,e,retregs,pretregs); 1385 } 1386 1387 /******************************** 1388 * Generate code for *= 1389 */ 1390 1391 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1392 { 1393 code cs; 1394 regm_t retregs; 1395 reg_t resreg; 1396 uint opr,isbyte; 1397 1398 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1399 elem *e1 = e.EV.E1; 1400 elem *e2 = e.EV.E2; 1401 OPER op = e.Eoper; // OPxxxx 1402 1403 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1404 char uns = tyuns(tyml) || tyuns(e2.Ety); 1405 uint sz = _tysize[tyml]; 1406 1407 uint rex = (I64 && sz == 8) ? REX_W : 0; 1408 uint grex = rex << 16; // 64 bit operands 1409 1410 // See if evaluate in XMM registers 1411 if (config.fpxmmregs && tyxmmreg(tyml) && !(*pretregs & mST0)) 1412 { 1413 xmmopass(cdb,e,pretregs); 1414 return; 1415 } 1416 1417 if (tyfloating(tyml)) 1418 { 1419 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1420 { 1421 opass87(cdb,e,pretregs); 1422 } 1423 else 1424 { 1425 opassdbl(cdb,e,pretregs,op); 1426 } 1427 return; 1428 } 1429 1430 if (sz <= REGSIZE) // if word or byte 1431 { 1432 if (e2.Eoper == OPconst && 1433 (I32 || I64) && 1434 el_signx32(e2) && 1435 sz >= 4) 1436 { 1437 // See if we can use an LEA instruction 1438 1439 int ss; 1440 int ss2 = 0; 1441 int shift; 1442 1443 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1444 switch (e2factor) 1445 { 1446 case 12: ss = 1; ss2 = 2; goto L4; 1447 case 24: ss = 1; ss2 = 3; goto L4; 1448 1449 case 6: 1450 case 3: ss = 1; goto L4; 1451 1452 case 20: ss = 2; ss2 = 2; goto L4; 1453 case 40: ss = 2; ss2 = 3; goto L4; 1454 1455 case 10: 1456 case 5: ss = 2; goto L4; 1457 1458 case 36: ss = 3; ss2 = 2; goto L4; 1459 case 72: ss = 3; ss2 = 3; goto L4; 1460 1461 case 18: 1462 case 9: ss = 3; goto L4; 1463 L4: 1464 { 1465 getlvalue(cdb,&cs,e1,0); // get EA 1466 modEA(cdb,&cs); 1467 freenode(e2); 1468 regm_t idxregs = idxregm(&cs); 1469 regm_t regm = *pretregs & ~(idxregs | mBP | mR13); // don't use EBP 1470 if (!regm) 1471 regm = allregs & ~(idxregs | mBP | mR13); 1472 reg_t reg; 1473 allocreg(cdb,®m,®,tyml); 1474 cs.Iop = LOD; 1475 code_newreg(&cs,reg); 1476 cs.Irex |= rex; 1477 cdb.gen(&cs); // MOV reg,EA 1478 1479 assert((reg & 7) != BP); 1480 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1481 modregxrmx(ss,reg,reg)); // LEA reg,[ss*reg][reg] 1482 if (ss2) 1483 { 1484 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1485 modregxrm(ss2,reg,5)); 1486 cdb.last().IFL1 = FLconst; 1487 cdb.last().IEV1.Vint = 0; // LEA reg,0[ss2*reg] 1488 } 1489 else if (!(e2factor & 1)) // if even factor 1490 { 1491 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1492 code_orrex(cdb.last(),rex); 1493 } 1494 opAssStoreReg(cdb,cs,e,reg,pretregs); 1495 return; 1496 } 1497 1498 case 37: 1499 case 74: shift = 2; 1500 goto L5; 1501 case 13: 1502 case 26: shift = 0; 1503 goto L5; 1504 L5: 1505 { 1506 getlvalue(cdb,&cs,e1,0); // get EA 1507 modEA(cdb,&cs); 1508 freenode(e2); 1509 regm_t idxregs = idxregm(&cs); 1510 regm_t regm = *pretregs & ~(idxregs | mBP | mR13); // don't use EBP 1511 if (!regm) 1512 regm = allregs & ~(idxregs | mBP | mR13); 1513 reg_t reg; // return register 1514 allocreg(cdb,®m,®,tyml); 1515 1516 reg_t sreg = allocScratchReg(cdb, allregs & ~(regm | idxregs | mBP | mR13)); 1517 1518 cs.Iop = LOD; 1519 code_newreg(&cs,sreg); 1520 cs.Irex |= rex; 1521 cdb.gen(&cs); // MOV sreg,EA 1522 1523 assert((sreg & 7) != BP); 1524 assert((reg & 7) != BP); 1525 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1526 modregxrmx(2,sreg,sreg)); // LEA reg,[sreg*4][sreg] 1527 if (shift) 1528 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift 1529 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1530 modregxrmx(3,sreg,reg)); // LEA reg,[sreg*8][reg] 1531 if (!(e2factor & 1)) // if even factor 1532 { 1533 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1534 code_orrex(cdb.last(),rex); 1535 } 1536 opAssStoreReg(cdb,cs,e,reg,pretregs); 1537 return; 1538 } 1539 1540 default: 1541 break; 1542 } 1543 } 1544 1545 isbyte = (sz == 1); // 1 for byte operation 1546 1547 if (config.target_cpu >= TARGET_80286 && 1548 e2.Eoper == OPconst && !isbyte) 1549 { 1550 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1551 if (I64 && sz == 8 && e2factor != cast(int)e2factor) 1552 goto L1; 1553 freenode(e2); 1554 getlvalue(cdb,&cs,e1,0); // get EA 1555 regm_t idxregs = idxregm(&cs); 1556 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs; 1557 if (!retregs) 1558 retregs = ALLREGS & ~idxregs; 1559 allocreg(cdb,&retregs,&resreg,tyml); 1560 cs.Iop = 0x69; // IMUL reg,EA,e2value 1561 cs.IFL2 = FLconst; 1562 cs.IEV2.Vint = cast(int)e2factor; 1563 opr = resreg; 1564 } 1565 else if (!I16 && !isbyte) 1566 { 1567 L1: 1568 retregs = *pretregs & (ALLREGS | mBP); 1569 if (!retregs) 1570 retregs = ALLREGS; 1571 codelem(cdb,e2,&retregs,false); // load rvalue in reg 1572 getlvalue(cdb,&cs,e1,retregs); // get EA 1573 getregs(cdb,retregs); // destroy these regs 1574 cs.Iop = 0x0FAF; // IMUL resreg,EA 1575 resreg = findreg(retregs); 1576 opr = resreg; 1577 } 1578 else 1579 { 1580 retregs = mAX; 1581 codelem(cdb,e2,&retregs,false); // load rvalue in AX 1582 getlvalue(cdb,&cs,e1,mAX); // get EA 1583 getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs 1584 cs.Iop = 0xF7 ^ isbyte; // [I]MUL EA 1585 opr = uns ? 4 : 5; // MUL/IMUL 1586 resreg = AX; // result register for * 1587 } 1588 code_newreg(&cs,opr); 1589 cdb.gen(&cs); 1590 1591 opAssStoreReg(cdb, cs, e, resreg, pretregs); 1592 return; 1593 } 1594 else if (sz == 2 * REGSIZE) 1595 { 1596 if (e2.Eoper == OPconst && I32) 1597 { 1598 /* if (msw) 1599 IMUL EDX,EDX,lsw 1600 IMUL reg,EAX,msw 1601 ADD reg,EDX 1602 else 1603 IMUL reg,EDX,lsw 1604 MOV EDX,lsw 1605 MUL EDX 1606 ADD EDX,reg 1607 */ 1608 freenode(e2); 1609 retregs = mDX|mAX; 1610 reg_t rhi, rlo; 1611 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 1612 const regm_t keepmsk = idxregm(&cs); 1613 1614 reg_t reg = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 1615 1616 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1617 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1)); 1618 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8)); 1619 1620 if (msw) 1621 { 1622 genmulimm(cdb,DX,DX,lsw); // IMUL EDX,EDX,lsw 1623 genmulimm(cdb,reg,AX,msw); // IMUL reg,EAX,msw 1624 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX 1625 } 1626 else 1627 genmulimm(cdb,reg,DX,lsw); // IMUL reg,EDX,lsw 1628 1629 movregconst(cdb,DX,lsw,0); // MOV EDX,lsw 1630 getregs(cdb,mDX); 1631 cdb.gen2(0xF7,modregrm(3,4,DX)); // MUL EDX 1632 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD EDX,reg 1633 } 1634 else 1635 { 1636 retregs = mDX | mAX; 1637 regm_t rretregs = (config.target_cpu >= TARGET_PentiumPro) ? allregs & ~retregs : mCX | mBX; 1638 codelem(cdb,e2,&rretregs,false); 1639 getlvalue(cdb,&cs,e1,retregs | rretregs); 1640 getregs(cdb,retregs); 1641 cs.Iop = LOD; 1642 cdb.gen(&cs); // MOV AX,EA 1643 getlvalue_msw(&cs); 1644 cs.Irm |= modregrm(0,DX,0); 1645 cdb.gen(&cs); // MOV DX,EA+2 1646 getlvalue_lsw(&cs); 1647 if (config.target_cpu >= TARGET_PentiumPro) 1648 { 1649 regm_t rlo = findreglsw(rretregs); 1650 regm_t rhi = findregmsw(rretregs); 1651 /* IMUL rhi,EAX 1652 IMUL EDX,rlo 1653 ADD rhi,EDX 1654 MUL rlo 1655 ADD EDX,Erhi 1656 */ 1657 getregs(cdb,mAX|mDX|mask(rhi)); 1658 cdb.gen2(0x0FAF,modregrm(3,rhi,AX)); 1659 cdb.gen2(0x0FAF,modregrm(3,DX,rlo)); 1660 cdb.gen2(0x03,modregrm(3,rhi,DX)); 1661 cdb.gen2(0xF7,modregrm(3,4,rlo)); 1662 cdb.gen2(0x03,modregrm(3,DX,rhi)); 1663 } 1664 else 1665 { 1666 callclib(cdb,e,CLIB.lmul,&retregs,idxregm(&cs)); 1667 } 1668 } 1669 1670 opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs); 1671 return; 1672 } 1673 else 1674 { 1675 assert(0); 1676 } 1677 } 1678 1679 1680 /******************************** 1681 * Generate code for /= %= 1682 */ 1683 1684 void cddivass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1685 { 1686 elem *e1 = e.EV.E1; 1687 elem *e2 = e.EV.E2; 1688 1689 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1690 OPER op = e.Eoper; // OPxxxx 1691 1692 // See if evaluate in XMM registers 1693 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0)) 1694 { 1695 xmmopass(cdb,e,pretregs); 1696 return; 1697 } 1698 1699 if (tyfloating(tyml)) 1700 { 1701 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1702 { 1703 opass87(cdb,e,pretregs); 1704 } 1705 else 1706 { 1707 opassdbl(cdb,e,pretregs,op); 1708 } 1709 return; 1710 } 1711 1712 code cs = void; 1713 1714 //printf("cddivass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1715 char uns = tyuns(tyml) || tyuns(e2.Ety); 1716 uint sz = _tysize[tyml]; 1717 1718 uint rex = (I64 && sz == 8) ? REX_W : 0; 1719 uint grex = rex << 16; // 64 bit operands 1720 1721 if (sz <= REGSIZE) // if word or byte 1722 { 1723 uint isbyte = (sz == 1); // 1 for byte operation 1724 reg_t resreg; 1725 targ_size_t e2factor; 1726 targ_size_t d; 1727 bool neg; 1728 int pow2; 1729 1730 assert(!isbyte); // should never happen 1731 assert(I16 || sz != SHORTSIZE); 1732 1733 if (e2.Eoper == OPconst) 1734 { 1735 e2factor = cast(targ_size_t)el_tolong(e2); 1736 pow2 = ispow2(e2factor); 1737 d = e2factor; 1738 if (!uns && cast(targ_llong)e2factor < 0) 1739 { 1740 neg = true; 1741 d = -d; 1742 } 1743 } 1744 1745 // Signed divide by a constant 1746 if (config.flags4 & CFG4speed && 1747 e2.Eoper == OPconst && 1748 !uns && 1749 (d & (d - 1)) && 1750 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8)))) 1751 { 1752 /* R1 / 10 1753 * 1754 * MOV EAX,m 1755 * IMUL R1 1756 * MOV EAX,R1 1757 * SAR EAX,31 1758 * SAR EDX,shpost 1759 * SUB EDX,EAX 1760 * IMUL EAX,EDX,d 1761 * SUB R1,EAX 1762 * 1763 * EDX = quotient 1764 * R1 = remainder 1765 */ 1766 assert(sz == 4 || sz == 8); 1767 1768 ulong m; 1769 int shpost; 1770 const int N = sz * 8; 1771 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost); 1772 1773 freenode(e2); 1774 1775 getlvalue(cdb,&cs,e1,mAX | mDX); 1776 reg_t reg; 1777 opAssLoadReg(cdb, cs, e, reg, allregs & ~( mAX | mDX | idxregm(&cs))); // MOV reg,EA 1778 getregs(cdb, mAX|mDX); 1779 1780 /* Algorithm 5.2 1781 * if m>=2**(N-1) 1782 * q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n) 1783 * else 1784 * q = SRA(MULSH(m,n), shpost) - XSIGN(n) 1785 * if (neg) 1786 * q = -q 1787 */ 1788 const bool mgt = mhighbit || m >= (1UL << (N - 1)); 1789 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EAX,m 1790 cdb.gen2(0xF7,grex | modregrmx(3,5,reg)); // IMUL reg 1791 if (mgt) 1792 cdb.gen2(0x03,grex | modregrmx(3,DX,reg)); // ADD EDX,reg 1793 getregsNoSave(mAX); // EAX no longer contains 'm' 1794 genmovreg(cdb, AX, reg); // MOV EAX,reg 1795 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1); // SAR EAX,31 1796 if (shpost) 1797 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost); // SAR EDX,shpost 1798 reg_t r3; 1799 if (neg && op == OPdivass) 1800 { 1801 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1802 r3 = AX; 1803 } 1804 else 1805 { 1806 cdb.gen2(0x2B,grex | modregrm(3,DX,AX)); // SUB EDX,EAX 1807 r3 = DX; 1808 } 1809 1810 // r3 is quotient 1811 reg_t resregx; 1812 switch (op) 1813 { case OPdivass: 1814 resregx = r3; 1815 break; 1816 1817 case OPmodass: 1818 assert(reg != AX && r3 == DX); 1819 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1820 { 1821 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1822 } 1823 else 1824 { 1825 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1826 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1827 getregsNoSave(mAX); // EAX no longer contains 'd' 1828 } 1829 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1830 resregx = reg; 1831 break; 1832 1833 default: 1834 assert(0); 1835 } 1836 1837 opAssStoreReg(cdb, cs, e, resregx, pretregs); 1838 return; 1839 } 1840 1841 // Unsigned divide by a constant 1842 void unsignedDivideByConstant(ref CodeBuilder cdb) 1843 { 1844 assert(sz == 4 || sz == 8); 1845 1846 reg_t r3; 1847 reg_t reg; 1848 ulong m; 1849 int shpre; 1850 int shpost; 1851 code cs = void; 1852 1853 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost)) 1854 { 1855 /* t1 = MULUH(m, n) 1856 * q = SRL(t1 + SRL(n - t1, 1), shpost - 1) 1857 * MOV EAX,reg 1858 * MOV EDX,m 1859 * MUL EDX 1860 * MOV EAX,reg 1861 * SUB EAX,EDX 1862 * SHR EAX,1 1863 * LEA R3,[EAX][EDX] 1864 * SHR R3,shpost-1 1865 */ 1866 assert(shpre == 0); 1867 1868 freenode(e2); 1869 getlvalue(cdb,&cs,e1,mAX | mDX); 1870 regm_t idxregs = idxregm(&cs); 1871 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA 1872 getregs(cdb, mAX|mDX); 1873 1874 genmovreg(cdb,AX,reg); // MOV EAX,reg 1875 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1876 getregs(cdb,mask(reg) | mDX | mAX); 1877 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1878 genmovreg(cdb,AX,reg); // MOV EAX,reg 1879 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1880 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1); // SHR EAX,1 1881 regm_t regm3 = allregs & ~idxregs; 1882 if (op == OPmodass) 1883 { 1884 regm3 &= ~mask(reg); 1885 if (!el_signx32(e2)) 1886 regm3 &= ~mAX; 1887 } 1888 allocreg(cdb,®m3,&r3,TYint); 1889 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX] 1890 if (shpost != 1) 1891 cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1); // SHR R3,shpost-1 1892 } 1893 else 1894 { 1895 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost) 1896 * SHR EAX,shpre 1897 * MOV reg,m 1898 * MUL reg 1899 * SHR EDX,shpost 1900 */ 1901 1902 freenode(e2); 1903 getlvalue(cdb,&cs,e1,mAX | mDX); 1904 regm_t idxregs = idxregm(&cs); 1905 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA 1906 getregs(cdb, mAX|mDX); 1907 1908 if (reg != AX) 1909 { 1910 getregs(cdb,mAX); 1911 genmovreg(cdb,AX,reg); // MOV EAX,reg 1912 } 1913 if (shpre) 1914 { 1915 getregs(cdb,mAX); 1916 cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre); // SHR EAX,shpre 1917 } 1918 getregs(cdb,mDX); 1919 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1920 getregs(cdb,mDX | mAX); 1921 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1922 if (shpost) 1923 cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost); // SHR EDX,shpost 1924 r3 = DX; 1925 } 1926 1927 reg_t resregx; 1928 switch (op) 1929 { 1930 case OPdivass: 1931 // r3 = quotient 1932 resregx = r3; 1933 break; 1934 1935 case OPmodass: 1936 /* reg = original value 1937 * r3 = quotient 1938 */ 1939 assert(reg != AX); 1940 if (el_signx32(e2)) 1941 { 1942 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1943 } 1944 else 1945 { 1946 assert(!(mask(r3) & mAX)); 1947 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1948 getregs(cdb,mAX); 1949 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1950 } 1951 getregs(cdb,mask(reg)); 1952 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1953 resregx = reg; 1954 break; 1955 1956 default: 1957 assert(0); 1958 } 1959 1960 opAssStoreReg(cdb, cs, e, resregx, pretregs); 1961 return; 1962 } 1963 1964 if (config.flags4 & CFG4speed && 1965 e2.Eoper == OPconst && 1966 uns && 1967 e2factor > 2 && (e2factor & (e2factor - 1)) && 1968 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8)))) 1969 { 1970 unsignedDivideByConstant(cdb); 1971 return; 1972 } 1973 1974 if (config.flags4 & CFG4speed && 1975 e2.Eoper == OPconst && !uns && 1976 (sz == REGSIZE || (I64 && sz == 4)) && 1977 pow2 != -1 && 1978 e2factor == cast(int)e2factor && 1979 !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass) 1980 ) 1981 { 1982 freenode(e2); 1983 if (pow2 == 1 && op == OPdivass && config.target_cpu > TARGET_80386) 1984 { 1985 /* This is better than the code further down because it is 1986 * not constrained to using AX and DX. 1987 */ 1988 getlvalue(cdb,&cs,e1,0); 1989 regm_t idxregs = idxregm(&cs); 1990 reg_t reg; 1991 opAssLoadReg(cdb,cs,e,reg,allregs & ~idxregs); // MOV reg,EA 1992 1993 reg_t r = allocScratchReg(cdb, allregs & ~(idxregs | mask(reg))); 1994 genmovreg(cdb,r,reg); // MOV r,reg 1995 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31 1996 cdb.gen2(0x03,grex | modregxrmx(3,reg,r)); // ADD reg,r 1997 cdb.gen2(0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 1998 1999 opAssStoreReg(cdb, cs, e, reg, pretregs); 2000 return; 2001 } 2002 2003 // Signed divide or modulo by power of 2 2004 getlvalue(cdb,&cs,e1,mAX | mDX); 2005 reg_t reg; 2006 opAssLoadReg(cdb,cs,e,reg,mAX); 2007 2008 getregs(cdb,mDX); // DX is scratch register 2009 cdb.gen1(0x99); // CWD 2010 code_orrex(cdb.last(), rex); 2011 if (pow2 == 1) 2012 { 2013 if (op == OPdivass) 2014 { 2015 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2016 cdb.gen2(0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 2017 resreg = AX; 2018 } 2019 else // OPmod 2020 { 2021 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2022 cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1 2023 cdb.gen2(0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 2024 resreg = DX; 2025 } 2026 } 2027 else 2028 { 2029 assert(pow2 < 32); 2030 targ_ulong m = (1 << pow2) - 1; 2031 if (op == OPdivass) 2032 { 2033 cdb.genc2(0x81,grex | modregrm(3,4,DX),m); // AND DX,m 2034 cdb.gen2(0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 2035 // Be careful not to generate this for 8088 2036 assert(config.target_cpu >= TARGET_80286); 2037 cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 2038 resreg = AX; 2039 } 2040 else // OPmodass 2041 { 2042 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2043 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2044 cdb.genc2(0x81,grex | modregrm(3,4,AX),m); // AND AX,m 2045 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2046 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2047 resreg = AX; 2048 } 2049 } 2050 } 2051 else 2052 { 2053 regm_t retregs = ALLREGS & ~(mAX|mDX); // DX gets sign extension 2054 codelem(cdb,e2,&retregs,false); // load rvalue in retregs 2055 reg_t reg = findreg(retregs); 2056 getlvalue(cdb,&cs,e1,mAX | mDX | retregs); // get EA 2057 getregs(cdb,mAX | mDX); // destroy these regs 2058 cs.Irm |= modregrm(0,AX,0); 2059 cs.Iop = LOD; 2060 cdb.gen(&cs); // MOV AX,EA 2061 if (uns) // if uint 2062 movregconst(cdb,DX,0,0); // CLR DX 2063 else // else signed 2064 { 2065 cdb.gen1(0x99); // CWD 2066 code_orrex(cdb.last(),rex); 2067 } 2068 getregs(cdb,mDX | mAX); // DX and AX will be destroyed 2069 const uint opr = uns ? 6 : 7; // DIV/IDIV 2070 genregs(cdb,0xF7,opr,reg); // OPR reg 2071 code_orrex(cdb.last(),rex); 2072 resreg = (op == OPmodass) ? DX : AX; // result register 2073 } 2074 opAssStoreReg(cdb, cs, e, resreg, pretregs); 2075 return; 2076 } 2077 2078 assert(sz == 2 * REGSIZE); 2079 2080 targ_size_t e2factor; 2081 int pow2; 2082 if (e2.Eoper == OPconst) 2083 { 2084 e2factor = cast(targ_size_t)el_tolong(e2); 2085 pow2 = ispow2(e2factor); 2086 } 2087 2088 // Register pair signed divide by power of 2 2089 if (op == OPdivass && 2090 !uns && 2091 e.Eoper == OPconst && 2092 pow2 != -1 && 2093 I32 // not set up for I16 or I64 cent 2094 ) 2095 { 2096 freenode(e2); 2097 regm_t retregs = mDX|mAX | mCX|mBX; // LSW must be byte reg because of later SETZ 2098 reg_t rhi, rlo; 2099 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 2100 const regm_t keepmsk = idxregm(&cs); 2101 retregs = mask(rhi) | mask(rlo); 2102 2103 if (pow2 < 32) 2104 { 2105 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2106 2107 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2108 if (pow2 == 1) 2109 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR r1,31 2110 else 2111 { 2112 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2113 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND r1,mask 2114 } 2115 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2116 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 2117 cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2); // SHRD rlo,rhi,pow2 2118 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2); // SAR rhi,pow2 2119 } 2120 else if (pow2 == 32) 2121 { 2122 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2123 2124 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2125 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2126 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2127 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 2128 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 2129 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 2130 } 2131 else if (pow2 < 63) 2132 { 2133 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2134 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk | mask(r1))); 2135 2136 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2137 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2138 cdb.genmovreg(r2,r1); // MOV r2,r1 2139 2140 if (pow2 == 33) 2141 { 2142 cdb.gen2(0xF7,modregrmx(3,3,r1)); // NEG r1 2143 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2)); // ADD rlo,r2 2144 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1)); // ADC rhi,r1 2145 } 2146 else 2147 { 2148 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask 2149 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2150 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 2151 } 2152 2153 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 2154 cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32); // SAR rlo,pow2-32 2155 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 2156 } 2157 else 2158 { 2159 // This may be better done by cgelem.d 2160 assert(pow2 == 63); 2161 assert(mask(rlo) & BYTEREGS); // for SETZ 2162 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000 2163 cdb.genregs(0x09,rlo,rhi); // OR rlo,rhi 2164 cdb.gen2(0x0F94,modregrmx(3,0,rlo)); // SETZ rlo 2165 cdb.genregs(MOVZXb,rlo,rlo); // MOVZX rlo,rloL 2166 movregconst(cdb,rhi,0,0); // MOV rhi,0 2167 } 2168 2169 opAssStorePair(cdb, cs, e, rlo, rhi, pretregs); 2170 return; 2171 } 2172 2173 // Register pair signed modulo by power of 2 2174 if (op == OPmodass && 2175 !uns && 2176 e.Eoper == OPconst && 2177 pow2 != -1 && 2178 I32 // not set up for I64 cent yet 2179 ) 2180 { 2181 freenode(e2); 2182 regm_t retregs = mDX|mAX; 2183 reg_t rhi, rlo; 2184 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 2185 const regm_t keepmsk = idxregm(&cs); 2186 2187 regm_t scratchm = allregs & ~(retregs | keepmsk); 2188 if (pow2 == 63) 2189 scratchm &= BYTEREGS; // because of SETZ 2190 reg_t r1 = allocScratchReg(cdb, scratchm); 2191 2192 if (pow2 < 32) 2193 { 2194 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2195 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2196 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 2197 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2198 cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1); // AND rlo,(1<<pow2)-1 2199 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 2200 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2201 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 2202 } 2203 else if (pow2 == 32) 2204 { 2205 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2206 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2207 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2208 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2209 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 2210 } 2211 else if (pow2 < 63) 2212 { 2213 scratchm = allregs & ~(retregs | scratchm); 2214 reg_t r2; 2215 allocreg(cdb,&scratchm,&r2,TYint); 2216 2217 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2218 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2219 cdb.genmovreg(r2,r1); // MOV r2,r1 2220 cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2); // SHRD r1,r2,64-pow2 2221 cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2); // SHR r2,64-pow2 2222 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2223 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 2224 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND rhi,(1<<(pow2-32))-1 2225 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2226 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2)); // SBB rhi,r2 2227 } 2228 else 2229 { 2230 // This may be better done by cgelem.d 2231 assert(pow2 == 63); 2232 2233 cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi] 2234 cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo)); // OR r1,rlo 2235 cdb.gen2(0x0F94,modregrmx(3,0,r1)); // SETZ r1 2236 cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL r1,31 2237 cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1)); // SUB rhi,r1 2238 } 2239 2240 opAssStorePair(cdb, cs, e, rlo, rhi, pretregs); 2241 return; 2242 } 2243 2244 regm_t rretregs = mCX|mBX; 2245 codelem(cdb,e2,&rretregs,false); // load e2 into CX|BX 2246 2247 reg_t rlo; 2248 reg_t rhi; 2249 opAssLoadPair(cdb, cs, e, rhi, rlo, mDX|mAX, rretregs); 2250 2251 regm_t retregs = (op == OPmodass) ? mCX|mBX : mDX|mAX; 2252 uint lib = uns ? CLIB.uldiv : CLIB.ldiv; 2253 if (op == OPmodass) 2254 ++lib; 2255 callclib(cdb,e,lib,&retregs,idxregm(&cs)); 2256 2257 opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs); 2258 } 2259 2260 2261 /******************************** 2262 * Generate code for <<= and >>= 2263 */ 2264 2265 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2266 { 2267 code cs; 2268 regm_t retregs; 2269 uint op1,op2; 2270 reg_t reg; 2271 2272 elem *e1 = e.EV.E1; 2273 elem *e2 = e.EV.E2; 2274 2275 tym_t tyml = tybasic(e1.Ety); // type of lvalue 2276 uint sz = _tysize[tyml]; 2277 uint isbyte = tybyte(e.Ety) != 0; // 1 for byte operations 2278 tym_t tym = tybasic(e.Ety); // type of result 2279 OPER oper = e.Eoper; 2280 assert(tysize(e2.Ety) <= REGSIZE); 2281 2282 uint rex = (I64 && sz == 8) ? REX_W : 0; 2283 2284 // if our lvalue is a cse, make sure we evaluate for result in register 2285 if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,®)) 2286 *pretregs |= ALLREGS; 2287 2288 version (SCPP) 2289 { 2290 // Do this until the rest of the compiler does OPshr/OPashr correctly 2291 if (oper == OPshrass) 2292 oper = tyuns(tyml) ? OPshrass : OPashrass; 2293 } 2294 2295 // Select opcodes. op2 is used for msw for long shifts. 2296 2297 switch (oper) 2298 { 2299 case OPshlass: 2300 op1 = 4; // SHL 2301 op2 = 2; // RCL 2302 break; 2303 2304 case OPshrass: 2305 op1 = 5; // SHR 2306 op2 = 3; // RCR 2307 break; 2308 2309 case OPashrass: 2310 op1 = 7; // SAR 2311 op2 = 3; // RCR 2312 break; 2313 2314 default: 2315 assert(0); 2316 } 2317 2318 2319 uint v = 0xD3; // for SHIFT xx,CL cases 2320 uint loopcnt = 1; 2321 uint conste2 = false; 2322 uint shiftcnt = 0; // avoid "use before initialized" warnings 2323 if (e2.Eoper == OPconst) 2324 { 2325 conste2 = true; // e2 is a constant 2326 shiftcnt = e2.EV.Vint; // byte ordering of host 2327 if (config.target_cpu >= TARGET_80286 && 2328 sz <= REGSIZE && 2329 shiftcnt != 1) 2330 v = 0xC1; // SHIFT xx,shiftcnt 2331 else if (shiftcnt <= 3) 2332 { 2333 loopcnt = shiftcnt; 2334 v = 0xD1; // SHIFT xx,1 2335 } 2336 } 2337 2338 if (v == 0xD3) // if COUNT == CL 2339 { 2340 retregs = mCX; 2341 codelem(cdb,e2,&retregs,false); 2342 } 2343 else 2344 freenode(e2); 2345 getlvalue(cdb,&cs,e1,mCX); // get lvalue, preserve CX 2346 modEA(cdb,&cs); // check for modifying register 2347 2348 if (*pretregs == 0 || // if don't return result 2349 (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) || 2350 sz > REGSIZE 2351 ) 2352 { 2353 retregs = 0; // value not returned in a register 2354 cs.Iop = v ^ isbyte; 2355 while (loopcnt--) 2356 { 2357 NEWREG(cs.Irm,op1); // make sure op1 is first 2358 if (sz <= REGSIZE) 2359 { 2360 if (conste2) 2361 { 2362 cs.IFL2 = FLconst; 2363 cs.IEV2.Vint = shiftcnt; 2364 } 2365 cdb.gen(&cs); // SHIFT EA,[CL|1] 2366 if (*pretregs & mPSW && !loopcnt && conste2) 2367 code_orflag(cdb.last(),CFpsw); 2368 } 2369 else // TYlong 2370 { 2371 cs.Iop = 0xD1; // plain shift 2372 code *ce = gennop(null); // ce: NOP 2373 if (v == 0xD3) 2374 { 2375 getregs(cdb,mCX); 2376 if (!conste2) 2377 { 2378 assert(loopcnt == 0); 2379 genjmp(cdb,JCXZ,FLcode,cast(block *) ce); // JCXZ ce 2380 } 2381 } 2382 code *cg; 2383 if (oper == OPshlass) 2384 { 2385 cdb.gen(&cs); // cg: SHIFT EA 2386 cg = cdb.last(); 2387 code_orflag(cg,CFpsw); 2388 getlvalue_msw(&cs); 2389 NEWREG(cs.Irm,op2); 2390 cdb.gen(&cs); // SHIFT EA 2391 getlvalue_lsw(&cs); 2392 } 2393 else 2394 { 2395 getlvalue_msw(&cs); 2396 cdb.gen(&cs); 2397 cg = cdb.last(); 2398 code_orflag(cg,CFpsw); 2399 NEWREG(cs.Irm,op2); 2400 getlvalue_lsw(&cs); 2401 cdb.gen(&cs); 2402 } 2403 if (v == 0xD3) // if building a loop 2404 { 2405 genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg 2406 regimmed_set(CX,0); // note that now CX == 0 2407 } 2408 cdb.append(ce); 2409 } 2410 } 2411 2412 // If we want the result, we must load it from the EA 2413 // into a register. 2414 2415 if (sz == 2 * REGSIZE && *pretregs) 2416 { 2417 retregs = *pretregs & (ALLREGS | mBP); 2418 if (retregs) 2419 { 2420 retregs &= ~idxregm(&cs); 2421 allocreg(cdb,&retregs,®,tym); 2422 cs.Iop = LOD; 2423 2424 // be careful not to trash any index regs 2425 // do MSW first (which can't be an index reg) 2426 getlvalue_msw(&cs); 2427 NEWREG(cs.Irm,reg); 2428 cdb.gen(&cs); 2429 getlvalue_lsw(&cs); 2430 reg = findreglsw(retregs); 2431 NEWREG(cs.Irm,reg); 2432 cdb.gen(&cs); 2433 if (*pretregs & mPSW) 2434 tstresult(cdb,retregs,tyml,true); 2435 } 2436 else // flags only 2437 { 2438 retregs = ALLREGS & ~idxregm(&cs); 2439 allocreg(cdb,&retregs,®,TYint); 2440 cs.Iop = LOD; 2441 NEWREG(cs.Irm,reg); 2442 cdb.gen(&cs); // MOV reg,EA 2443 cs.Iop = 0x0B; // OR reg,EA+2 2444 cs.Iflags |= CFpsw; 2445 getlvalue_msw(&cs); 2446 cdb.gen(&cs); 2447 } 2448 } 2449 if (e1.Ecount && !(retregs & regcon.mvar)) // if lvalue is a CSE 2450 cssave(e1,retregs,!OTleaf(e1.Eoper)); 2451 freenode(e1); 2452 *pretregs = retregs; 2453 return; 2454 } 2455 else // else must evaluate in register 2456 { 2457 if (sz <= REGSIZE) 2458 { 2459 regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs); 2460 if (isbyte) 2461 possregs &= BYTEREGS; 2462 retregs = *pretregs & possregs; 2463 if (retregs == 0) 2464 retregs = possregs; 2465 allocreg(cdb,&retregs,®,tym); 2466 cs.Iop = LOD ^ isbyte; 2467 code_newreg(&cs, reg); 2468 if (isbyte && I64 && (reg >= 4)) 2469 cs.Irex |= REX; 2470 cdb.gen(&cs); // MOV reg,EA 2471 if (!I16) 2472 { 2473 assert(!isbyte || (mask(reg) & BYTEREGS)); 2474 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt); 2475 if (isbyte && I64 && (reg >= 4)) 2476 cdb.last().Irex |= REX; 2477 code_orrex(cdb.last(), rex); 2478 // We can do a 32 bit shift on a 16 bit operand if 2479 // it's a left shift and we're not concerned about 2480 // the flags. Remember that flags are not set if 2481 // a shift of 0 occurs. 2482 if (_tysize[tym] == SHORTSIZE && 2483 (oper == OPshrass || oper == OPashrass || 2484 (*pretregs & mPSW && conste2))) 2485 cdb.last().Iflags |= CFopsize; // 16 bit operand 2486 } 2487 else 2488 { 2489 while (loopcnt--) 2490 { // Generate shift instructions. 2491 cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt); 2492 } 2493 } 2494 if (*pretregs & mPSW && conste2) 2495 { 2496 assert(shiftcnt); 2497 *pretregs &= ~mPSW; // result is already in flags 2498 code_orflag(cdb.last(),CFpsw); 2499 } 2500 2501 opAssStoreReg(cdb,cs,e,reg,pretregs); 2502 return; 2503 } 2504 assert(0); 2505 } 2506 } 2507 2508 2509 /********************************** 2510 * Generate code for compares. 2511 * Handles lt,gt,le,ge,eqeq,ne for all data types. 2512 */ 2513 2514 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2515 { 2516 regm_t retregs,rretregs; 2517 reg_t reg,rreg; 2518 int fl; 2519 2520 //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs)); 2521 // Collect extra parameter. This is pretty ugly... 2522 int flag = cdcmp_flag; 2523 cdcmp_flag = 0; 2524 2525 elem *e1 = e.EV.E1; 2526 elem *e2 = e.EV.E2; 2527 if (*pretregs == 0) // if don't want result 2528 { 2529 codelem(cdb,e1,pretregs,false); 2530 *pretregs = 0; // in case e1 changed it 2531 codelem(cdb,e2,pretregs,false); 2532 return; 2533 } 2534 2535 uint jop = jmpopcode(e); // must be computed before 2536 // leaves are free'd 2537 uint reverse = 0; 2538 2539 OPER op = e.Eoper; 2540 assert(OTrel(op)); 2541 bool eqorne = (op == OPeqeq) || (op == OPne); 2542 2543 tym_t tym = tybasic(e1.Ety); 2544 uint sz = _tysize[tym]; 2545 uint isbyte = sz == 1; 2546 2547 uint rex = (I64 && sz == 8) ? REX_W : 0; 2548 uint grex = rex << 16; // 64 bit operands 2549 2550 code cs; 2551 code *ce; 2552 if (tyfloating(tym)) // if floating operation 2553 { 2554 if (config.fpxmmregs) 2555 { 2556 retregs = mPSW; 2557 if (tyxmmreg(tym)) 2558 orthxmm(cdb,e,&retregs); 2559 else 2560 orth87(cdb,e,&retregs); 2561 } 2562 else if (config.inline8087) 2563 { retregs = mPSW; 2564 orth87(cdb,e,&retregs); 2565 } 2566 else 2567 { 2568 static if (TARGET_WINDOS) 2569 { 2570 int clib; 2571 2572 retregs = 0; /* skip result for now */ 2573 if (iffalse(e2)) /* second operand is constant 0 */ 2574 { 2575 assert(!eqorne); /* should be OPbool or OPnot */ 2576 if (tym == TYfloat) 2577 { 2578 retregs = FLOATREGS; 2579 clib = CLIB.ftst0; 2580 } 2581 else 2582 { 2583 retregs = DOUBLEREGS; 2584 clib = CLIB.dtst0; 2585 } 2586 if (rel_exception(op)) 2587 clib += CLIB.dtst0exc - CLIB.dtst0; 2588 codelem(cdb,e1,&retregs,false); 2589 retregs = 0; 2590 callclib(cdb,e,clib,&retregs,0); 2591 freenode(e2); 2592 } 2593 else 2594 { 2595 clib = CLIB.dcmp; 2596 if (rel_exception(op)) 2597 clib += CLIB.dcmpexc - CLIB.dcmp; 2598 opdouble(cdb,e,&retregs,clib); 2599 } 2600 } 2601 else 2602 { 2603 assert(0); 2604 } 2605 } 2606 goto L3; 2607 } 2608 2609 /* If it's a signed comparison of longs, we have to call a library */ 2610 /* routine, because we don't know the target of the signed branch */ 2611 /* (have to set up flags so that jmpopcode() will do it right) */ 2612 if (!eqorne && 2613 (I16 && tym == TYlong && tybasic(e2.Ety) == TYlong || 2614 I32 && tym == TYllong && tybasic(e2.Ety) == TYllong) 2615 ) 2616 { 2617 assert(jop != JC && jop != JNC); 2618 retregs = mDX | mAX; 2619 codelem(cdb,e1,&retregs,false); 2620 retregs = mCX | mBX; 2621 scodelem(cdb,e2,&retregs,mDX | mAX,false); 2622 2623 if (I16) 2624 { 2625 retregs = 0; 2626 callclib(cdb,e,CLIB.lcmp,&retregs,0); // gross, but it works 2627 } 2628 else 2629 { 2630 /* Generate: 2631 * CMP EDX,ECX 2632 * JNE C1 2633 * XOR EDX,EDX 2634 * CMP EAX,EBX 2635 * JZ C1 2636 * JA C3 2637 * DEC EDX 2638 * JMP C1 2639 * C3: INC EDX 2640 * C1: 2641 */ 2642 getregs(cdb,mDX); 2643 genregs(cdb,0x39,CX,DX); // CMP EDX,ECX 2644 code *c1 = gennop(null); 2645 genjmp(cdb,JNE,FLcode,cast(block *)c1); // JNE C1 2646 movregconst(cdb,DX,0,0); // XOR EDX,EDX 2647 genregs(cdb,0x39,BX,AX); // CMP EAX,EBX 2648 genjmp(cdb,JE,FLcode,cast(block *)c1); // JZ C1 2649 code *c3 = gen1(null,0x40 + DX); // INC EDX 2650 genjmp(cdb,JA,FLcode,cast(block *)c3); // JA C3 2651 cdb.gen1(0x48 + DX); // DEC EDX 2652 genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1 2653 cdb.append(c3); 2654 cdb.append(c1); 2655 getregs(cdb,mDX); 2656 retregs = mPSW; 2657 } 2658 goto L3; 2659 } 2660 2661 /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC 2662 * (This is already reflected in the jop) 2663 */ 2664 if ((jop == JC || jop == JNC) && 2665 (op == OPgt || op == OPle) && 2666 (tyuns(tym) || tyuns(e2.Ety)) 2667 ) 2668 { // jmpopcode() sez comparison should be reversed 2669 assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst); 2670 reverse ^= 2; 2671 } 2672 2673 /* See if we should swap operands */ 2674 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 2675 { 2676 e1 = e.EV.E2; 2677 e2 = e.EV.E1; 2678 reverse ^= 2; 2679 } 2680 2681 retregs = allregs; 2682 if (isbyte) 2683 retregs = BYTEREGS; 2684 2685 ce = null; 2686 cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 2687 cs.Irex = cast(ubyte)rex; 2688 if (sz > REGSIZE) 2689 ce = gennop(ce); 2690 2691 switch (e2.Eoper) 2692 { 2693 default: 2694 L2: 2695 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2696 rretregs = allregs & ~retregs; 2697 if (isbyte) 2698 rretregs &= BYTEREGS; 2699 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 2700 if (sz <= REGSIZE) // CMP reg,rreg 2701 { 2702 reg = findreg(retregs); // get reg that e1 is in 2703 rreg = findreg(rretregs); 2704 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg); 2705 code_orrex(cdb.last(), rex); 2706 if (!I16 && sz == SHORTSIZE) 2707 cdb.last().Iflags |= CFopsize; // compare only 16 bits 2708 if (I64 && isbyte && (reg >= 4 || rreg >= 4)) 2709 cdb.last().Irex |= REX; // address byte registers 2710 } 2711 else 2712 { 2713 assert(sz <= 2 * REGSIZE); 2714 2715 // Compare MSW, if they're equal then compare the LSW 2716 reg = findregmsw(retregs); 2717 rreg = findregmsw(rretregs); 2718 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2719 if (I32 && sz == 6) 2720 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2721 else if (I64) 2722 code_orrex(cdb.last(), REX_W); 2723 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2724 2725 reg = findreglsw(retregs); 2726 rreg = findreglsw(rretregs); 2727 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2728 if (I64) 2729 code_orrex(cdb.last(), REX_W); 2730 } 2731 break; 2732 2733 case OPrelconst: 2734 if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) 2735 goto L2; 2736 fl = el_fl(e2); 2737 switch (fl) 2738 { 2739 case FLfunc: 2740 fl = FLextern; // so it won't be self-relative 2741 break; 2742 2743 case FLdata: 2744 case FLudata: 2745 case FLextern: 2746 if (sz > REGSIZE) // compare against DS, not DGROUP 2747 goto L2; 2748 break; 2749 2750 case FLfardata: 2751 break; 2752 2753 default: 2754 goto L2; 2755 } 2756 cs.IFL2 = cast(ubyte)fl; 2757 cs.IEV2.Vsym = e2.EV.Vsym; 2758 if (sz > REGSIZE) 2759 { 2760 cs.Iflags |= CFseg; 2761 cs.IEV2.Voffset = 0; 2762 } 2763 else 2764 { 2765 cs.Iflags |= CFoff; 2766 cs.IEV2.Voffset = e2.EV.Voffset; 2767 } 2768 goto L4; 2769 2770 case OPconst: 2771 // If compare against 0 2772 if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) && 2773 isregvar(e1,&retregs,®) 2774 ) 2775 { // Just do a TEST instruction 2776 genregs(cdb,0x85 ^ isbyte,reg,reg); // TEST reg,reg 2777 cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw; 2778 code_orrex(cdb.last(), rex); 2779 if (I64 && isbyte && reg >= 4) 2780 cdb.last().Irex |= REX; // address byte registers 2781 retregs = mPSW; 2782 break; 2783 } 2784 2785 if (!tyuns(tym) && !tyuns(e2.Ety) && 2786 !boolres(e2) && !(*pretregs & mPSW) && 2787 (sz == REGSIZE || (I64 && sz == 4)) && 2788 (!I16 || op == OPlt || op == OPge)) 2789 { 2790 assert(*pretregs & (allregs)); 2791 codelem(cdb,e1,pretregs,false); 2792 reg = findreg(*pretregs); 2793 getregs(cdb,mask(reg)); 2794 switch (op) 2795 { 2796 case OPle: 2797 cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1); // ADD reg,-1 2798 code_orflag(cdb.last(), CFpsw); 2799 cdb.genc2(0x81,grex | modregrmx(3,2,reg),0); // ADC reg,0 2800 goto oplt; 2801 2802 case OPgt: 2803 cdb.gen2(0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2804 /* Flips the sign bit unless the value is 0 or int.min. 2805 Also sets the carry bit when the value is not 0. */ 2806 code_orflag(cdb.last(), CFpsw); 2807 cdb.genc2(0x81,grex | modregrmx(3,3,reg),0); // SBB reg,0 2808 /* Subtracts the carry bit. This turns int.min into 2809 int.max, flipping the sign bit. 2810 For other negative and positive values, subtracting 1 2811 doesn't affect the sign bit. 2812 For 0, the carry bit is not set, so this does nothing 2813 and the sign bit is not affected. */ 2814 goto oplt; 2815 2816 case OPlt: 2817 oplt: 2818 // Get the sign bit, i.e. 1 if the value is negative. 2819 if (!I16) 2820 cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31 2821 else 2822 { /* 8088-286 do not have a barrel shifter, so use this 2823 faster sequence 2824 */ 2825 genregs(cdb,0xD1,0,reg); // ROL reg,1 2826 reg_t regi; 2827 if (reghasvalue(allregs,1,®i)) 2828 genregs(cdb,0x23,reg,regi); // AND reg,regi 2829 else 2830 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1 2831 } 2832 break; 2833 2834 case OPge: 2835 genregs(cdb,0xD1,4,reg); // SHL reg,1 2836 code_orrex(cdb.last(),rex); 2837 code_orflag(cdb.last(), CFpsw); 2838 genregs(cdb,0x19,reg,reg); // SBB reg,reg 2839 code_orrex(cdb.last(),rex); 2840 if (I64) 2841 { 2842 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 2843 code_orrex(cdb.last(), rex); 2844 } 2845 else 2846 cdb.gen1(0x40 + reg); // INC reg 2847 break; 2848 2849 default: 2850 assert(0); 2851 } 2852 freenode(e2); 2853 goto ret; 2854 } 2855 2856 cs.IFL2 = FLconst; 2857 if (sz == 16) 2858 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.msw; 2859 else if (sz > REGSIZE) 2860 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); 2861 else 2862 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong; 2863 2864 // The cmp immediate relies on sign extension of the 32 bit immediate value 2865 if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint) 2866 goto L2; 2867 L4: 2868 cs.Iop = 0x81 ^ isbyte; 2869 2870 /* if ((e1 is data or a '*' reference) and it's not a 2871 * common subexpression 2872 */ 2873 2874 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 2875 e1.Eoper == OPind) && 2876 !evalinregister(e1)) 2877 { 2878 getlvalue(cdb,&cs,e1,RMload); 2879 freenode(e1); 2880 if (evalinregister(e2)) 2881 { 2882 retregs = idxregm(&cs); 2883 if ((cs.Iflags & CFSEG) == CFes) 2884 retregs |= mES; // take no chances 2885 rretregs = allregs & ~retregs; 2886 if (isbyte) 2887 rretregs &= BYTEREGS; 2888 scodelem(cdb,e2,&rretregs,retregs,true); 2889 cs.Iop = 0x39 ^ isbyte ^ reverse; 2890 if (sz > REGSIZE) 2891 { 2892 rreg = findregmsw(rretregs); 2893 cs.Irm |= modregrm(0,rreg,0); 2894 getlvalue_msw(&cs); 2895 cdb.gen(&cs); // CMP EA+2,rreg 2896 if (I32 && sz == 6) 2897 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2898 if (I64 && isbyte && rreg >= 4) 2899 cdb.last().Irex |= REX; 2900 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2901 rreg = findreglsw(rretregs); 2902 NEWREG(cs.Irm,rreg); 2903 getlvalue_lsw(&cs); 2904 } 2905 else 2906 { 2907 rreg = findreg(rretregs); 2908 code_newreg(&cs, rreg); 2909 if (I64 && isbyte && rreg >= 4) 2910 cs.Irex |= REX; 2911 } 2912 } 2913 else 2914 { 2915 cs.Irm |= modregrm(0,7,0); 2916 if (sz > REGSIZE) 2917 { 2918 if (sz == 6) 2919 assert(0); 2920 if (e2.Eoper == OPrelconst) 2921 { cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg; 2922 cs.IEV2.Voffset = 0; 2923 } 2924 getlvalue_msw(&cs); 2925 cdb.gen(&cs); // CMP EA+2,const 2926 if (!I16 && sz == 6) 2927 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2928 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop 2929 if (e2.Eoper == OPconst) 2930 cs.IEV2.Vint = cast(int)e2.EV.Vllong; 2931 else if (e2.Eoper == OPrelconst) 2932 { // Turn off CFseg, on CFoff 2933 cs.Iflags ^= CFseg | CFoff; 2934 cs.IEV2.Voffset = e2.EV.Voffset; 2935 } 2936 else 2937 assert(0); 2938 getlvalue_lsw(&cs); 2939 } 2940 freenode(e2); 2941 } 2942 cdb.gen(&cs); 2943 break; 2944 } 2945 2946 if (evalinregister(e2) && !OTassign(e1.Eoper) && 2947 !isregvar(e1,null,null)) 2948 { 2949 regm_t m; 2950 2951 m = allregs & ~regcon.mvar; 2952 if (isbyte) 2953 m &= BYTEREGS; 2954 if (m & (m - 1)) // if more than one free register 2955 goto L2; 2956 } 2957 if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) && 2958 !boolres(e2) && !evalinregister(e1)) 2959 { 2960 retregs = mPSW; 2961 scodelem(cdb,e1,&retregs,0,false); 2962 freenode(e2); 2963 break; 2964 } 2965 if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW) 2966 { 2967 retregs |= mPSW; 2968 scodelem(cdb,e1,&retregs,0,false); 2969 freenode(e2); 2970 break; 2971 } 2972 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2973 if (sz == 1) 2974 { 2975 reg = findreg(retregs & allregs); // get reg that e1 is in 2976 cs.Irm = modregrm(3,7,reg & 7); 2977 if (reg & 8) 2978 cs.Irex |= REX_B; 2979 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg) 2980 { assert(reg < 4); 2981 cs.Irm |= 4; // use upper register half 2982 } 2983 if (I64 && reg >= 4) 2984 cs.Irex |= REX; // address byte registers 2985 } 2986 else if (sz <= REGSIZE) 2987 { // CMP reg,const 2988 reg = findreg(retregs & allregs); // get reg that e1 is in 2989 rretregs = allregs & ~retregs; 2990 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg)) 2991 { 2992 genregs(cdb,0x3B,reg,rreg); 2993 code_orrex(cdb.last(), rex); 2994 if (!I16) 2995 cdb.last().Iflags |= cs.Iflags & CFopsize; 2996 freenode(e2); 2997 break; 2998 } 2999 cs.Irm = modregrm(3,7,reg & 7); 3000 if (reg & 8) 3001 cs.Irex |= REX_B; 3002 } 3003 else if (sz <= 2 * REGSIZE) 3004 { 3005 reg = findregmsw(retregs); // get reg that e1 is in 3006 cs.Irm = modregrm(3,7,reg); 3007 cdb.gen(&cs); // CMP reg,MSW 3008 if (I32 && sz == 6) 3009 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 3010 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 3011 3012 reg = findreglsw(retregs); 3013 cs.Irm = modregrm(3,7,reg); 3014 if (e2.Eoper == OPconst) 3015 cs.IEV2.Vint = e2.EV.Vlong; 3016 else if (e2.Eoper == OPrelconst) 3017 { // Turn off CFseg, on CFoff 3018 cs.Iflags ^= CFseg | CFoff; 3019 cs.IEV2.Voffset = e2.EV.Voffset; 3020 } 3021 else 3022 assert(0); 3023 } 3024 else 3025 assert(0); 3026 cdb.gen(&cs); // CMP sucreg,LSW 3027 freenode(e2); 3028 break; 3029 3030 case OPind: 3031 if (e2.Ecount) 3032 goto L2; 3033 goto L5; 3034 3035 case OPvar: 3036 static if (TARGET_OSX) 3037 { 3038 if (movOnly(e2)) 3039 goto L2; 3040 } 3041 if ((e1.Eoper == OPvar && 3042 isregvar(e2,&rretregs,®) && 3043 sz <= REGSIZE 3044 ) || 3045 (e1.Eoper == OPind && 3046 isregvar(e2,&rretregs,®) && 3047 !evalinregister(e1) && 3048 sz <= REGSIZE 3049 ) 3050 ) 3051 { 3052 // CMP EA,e2 3053 getlvalue(cdb,&cs,e1,RMload); 3054 freenode(e1); 3055 cs.Iop = 0x39 ^ isbyte ^ reverse; 3056 code_newreg(&cs,reg); 3057 if (I64 && isbyte && reg >= 4) 3058 cs.Irex |= REX; // address byte registers 3059 cdb.gen(&cs); 3060 freenode(e2); 3061 break; 3062 } 3063 L5: 3064 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3065 if (sz <= REGSIZE) // CMP reg,EA 3066 { 3067 reg = findreg(retregs & allregs); // get reg that e1 is in 3068 uint opsize = cs.Iflags & CFopsize; 3069 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0); 3070 code_orflag(cdb.last(),opsize); 3071 } 3072 else if (sz <= 2 * REGSIZE) 3073 { 3074 reg = findregmsw(retregs); // get reg that e1 is in 3075 // CMP reg,EA 3076 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0); 3077 if (I32 && sz == 6) 3078 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 3079 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 3080 reg = findreglsw(retregs); 3081 if (e2.Eoper == OPind) 3082 { 3083 NEWREG(cs.Irm,reg); 3084 getlvalue_lsw(&cs); 3085 cdb.gen(&cs); 3086 } 3087 else 3088 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0); 3089 } 3090 else 3091 assert(0); 3092 freenode(e2); 3093 break; 3094 } 3095 cdb.append(ce); 3096 3097 L3: 3098 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 3099 { 3100 if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00)) 3101 { 3102 regm_t resregs = retregs; 3103 if (!I64) 3104 { 3105 resregs &= BYTEREGS; 3106 if (!resregs) 3107 resregs = BYTEREGS; 3108 } 3109 allocreg(cdb,&resregs,®,TYint); 3110 cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg 3111 if (I64 && reg >= 4) 3112 code_orrex(cdb.last(),REX); 3113 if (tysize(e.Ety) > 1) 3114 { 3115 genregs(cdb,MOVZXb,reg,reg); // MOVZX reg,reg 3116 if (I64 && sz == 8) 3117 code_orrex(cdb.last(),REX_W); 3118 if (I64 && reg >= 4) 3119 code_orrex(cdb.last(),REX); 3120 } 3121 *pretregs &= ~mPSW; 3122 fixresult(cdb,e,resregs,pretregs); 3123 } 3124 else 3125 { 3126 code *nop = null; 3127 regm_t save = regcon.immed.mval; 3128 allocreg(cdb,&retregs,®,TYint); 3129 regcon.immed.mval = save; 3130 if ((*pretregs & mPSW) == 0 && 3131 (jop == JC || jop == JNC)) 3132 { 3133 getregs(cdb,retregs); 3134 genregs(cdb,0x19,reg,reg); // SBB reg,reg 3135 if (rex || flag & REX_W) 3136 code_orrex(cdb.last(), REX_W); 3137 if (flag) 3138 { } // cdcond() will handle it 3139 else if (jop == JNC) 3140 { 3141 if (I64) 3142 { 3143 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 3144 code_orrex(cdb.last(), rex); 3145 } 3146 else 3147 cdb.gen1(0x40 + reg); // INC reg 3148 } 3149 else 3150 { 3151 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 3152 code_orrex(cdb.last(), rex); 3153 } 3154 } 3155 else if (I64 && sz == 8) 3156 { 3157 assert(!flag); 3158 movregconst(cdb,reg,1,64|8); // MOV reg,1 3159 nop = gennop(nop); 3160 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 3161 // MOV reg,0 3162 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64); 3163 regcon.immed.mval &= ~mask(reg); 3164 } 3165 else 3166 { 3167 assert(!flag); 3168 movregconst(cdb,reg,1,8); // MOV reg,1 3169 nop = gennop(nop); 3170 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 3171 // MOV reg,0 3172 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0); 3173 regcon.immed.mval &= ~mask(reg); 3174 } 3175 *pretregs = retregs; 3176 cdb.append(nop); 3177 } 3178 } 3179 ret: 3180 { } 3181 } 3182 3183 3184 /********************************** 3185 * Generate code for signed compare of longs. 3186 * Input: 3187 * targ block* or code* 3188 */ 3189 3190 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ) 3191 { 3192 // <= > < >= 3193 static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ]; 3194 static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ]; 3195 3196 //printf("longcmp(e = %p)\n", e); 3197 elem *e1 = e.EV.E1; 3198 elem *e2 = e.EV.E2; 3199 OPER op = e.Eoper; 3200 3201 // See if we should swap operands 3202 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 3203 { 3204 e1 = e.EV.E2; 3205 e2 = e.EV.E1; 3206 op = swaprel(op); 3207 } 3208 3209 code cs; 3210 cs.Iflags = 0; 3211 cs.Irex = 0; 3212 3213 code *ce = gennop(null); 3214 regm_t retregs = ALLREGS; 3215 regm_t rretregs; 3216 reg_t reg,rreg; 3217 3218 uint jop = jopmsw[op - OPle]; 3219 if (!(jcond & 1)) jop ^= (JL ^ JG); // toggle jump condition 3220 CodeBuilder cdbjmp; 3221 cdbjmp.ctor(); 3222 genjmp(cdbjmp,jop,fltarg, cast(block *) targ); // Jx targ 3223 genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce); // Jy nop 3224 3225 switch (e2.Eoper) 3226 { 3227 default: 3228 L2: 3229 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3230 rretregs = ALLREGS & ~retregs; 3231 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 3232 cse_flush(cdb,1); 3233 // Compare MSW, if they're equal then compare the LSW 3234 reg = findregmsw(retregs); 3235 rreg = findregmsw(rretregs); 3236 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 3237 cdb.append(cdbjmp); 3238 3239 reg = findreglsw(retregs); 3240 rreg = findreglsw(rretregs); 3241 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 3242 break; 3243 3244 case OPconst: 3245 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); // MSW first 3246 cs.IFL2 = FLconst; 3247 cs.Iop = 0x81; 3248 3249 /* if ((e1 is data or a '*' reference) and it's not a 3250 * common subexpression 3251 */ 3252 3253 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 3254 e1.Eoper == OPind) && 3255 !evalinregister(e1)) 3256 { 3257 getlvalue(cdb,&cs,e1,0); 3258 freenode(e1); 3259 if (evalinregister(e2)) 3260 { 3261 retregs = idxregm(&cs); 3262 if ((cs.Iflags & CFSEG) == CFes) 3263 retregs |= mES; // take no chances 3264 rretregs = ALLREGS & ~retregs; 3265 scodelem(cdb,e2,&rretregs,retregs,true); 3266 cse_flush(cdb,1); 3267 rreg = findregmsw(rretregs); 3268 cs.Iop = 0x39; 3269 cs.Irm |= modregrm(0,rreg,0); 3270 getlvalue_msw(&cs); 3271 cdb.gen(&cs); // CMP EA+2,rreg 3272 cdb.append(cdbjmp); 3273 rreg = findreglsw(rretregs); 3274 NEWREG(cs.Irm,rreg); 3275 } 3276 else 3277 { 3278 cse_flush(cdb,1); 3279 cs.Irm |= modregrm(0,7,0); 3280 getlvalue_msw(&cs); 3281 cdb.gen(&cs); // CMP EA+2,const 3282 cdb.append(cdbjmp); 3283 cs.IEV2.Vint = e2.EV.Vlong; 3284 freenode(e2); 3285 } 3286 getlvalue_lsw(&cs); 3287 cdb.gen(&cs); // CMP EA,rreg/const 3288 break; 3289 } 3290 if (evalinregister(e2)) 3291 goto L2; 3292 3293 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3294 cse_flush(cdb,1); 3295 reg = findregmsw(retregs); // get reg that e1 is in 3296 cs.Irm = modregrm(3,7,reg); 3297 3298 cdb.gen(&cs); // CMP reg,MSW 3299 cdb.append(cdbjmp); 3300 reg = findreglsw(retregs); 3301 cs.Irm = modregrm(3,7,reg); 3302 cs.IEV2.Vint = e2.EV.Vlong; 3303 cdb.gen(&cs); // CMP sucreg,LSW 3304 freenode(e2); 3305 break; 3306 3307 case OPvar: 3308 if (!e1.Ecount && e1.Eoper == OPs32_64) 3309 { 3310 reg_t msreg; 3311 3312 retregs = allregs; 3313 scodelem(cdb,e1.EV.E1,&retregs,0,true); 3314 freenode(e1); 3315 reg = findreg(retregs); 3316 retregs = allregs & ~retregs; 3317 allocreg(cdb,&retregs,&msreg,TYint); 3318 genmovreg(cdb,msreg,reg); // MOV msreg,reg 3319 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 3320 cse_flush(cdb,1); 3321 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0); 3322 cdb.append(cdbjmp); 3323 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0); 3324 freenode(e2); 3325 } 3326 else 3327 { 3328 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3329 cse_flush(cdb,1); 3330 reg = findregmsw(retregs); // get reg that e1 is in 3331 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0); 3332 cdb.append(cdbjmp); 3333 reg = findreglsw(retregs); 3334 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0); 3335 freenode(e2); 3336 } 3337 break; 3338 } 3339 3340 jop = joplsw[op - OPle]; 3341 if (!(jcond & 1)) jop ^= 1; // toggle jump condition 3342 genjmp(cdb,jop,fltarg,cast(block *) targ); // Jcond targ 3343 3344 cdb.append(ce); 3345 freenode(e); 3346 } 3347 3348 /***************************** 3349 * Do conversions. 3350 * Depends on OPd_s32 and CLIB.dbllng being in sequence. 3351 */ 3352 3353 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 3354 { 3355 //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs)); 3356 //elem_print(e); 3357 3358 static immutable ubyte[2][16] clib = 3359 [ 3360 [ OPd_s32, CLIB.dbllng ], 3361 [ OPs32_d, CLIB.lngdbl ], 3362 [ OPd_s16, CLIB.dblint ], 3363 [ OPs16_d, CLIB.intdbl ], 3364 [ OPd_u16, CLIB.dbluns ], 3365 [ OPu16_d, CLIB.unsdbl ], 3366 [ OPd_u32, CLIB.dblulng ], 3367 [ OPu32_d, CLIB.ulngdbl ], 3368 [ OPd_s64, CLIB.dblllng ], 3369 [ OPs64_d, CLIB.llngdbl ], 3370 [ OPd_u64, CLIB.dblullng ], 3371 [ OPu64_d, CLIB.ullngdbl ], 3372 [ OPd_f, CLIB.dblflt ], 3373 [ OPf_d, CLIB.fltdbl ], 3374 [ OPvp_fp, CLIB.vptrfptr ], 3375 [ OPcvp_fp, CLIB.cvptrfptr] 3376 ]; 3377 3378 if (!*pretregs) 3379 { 3380 codelem(cdb,e.EV.E1,pretregs,false); 3381 return; 3382 } 3383 3384 regm_t retregs; 3385 if (config.inline8087) 3386 { 3387 switch (e.Eoper) 3388 { 3389 case OPld_d: 3390 case OPd_ld: 3391 { 3392 if (tycomplex(e.EV.E1.Ety)) 3393 { 3394 Lcomplex: 3395 regm_t retregsx = mST01 | (*pretregs & mPSW); 3396 codelem(cdb,e.EV.E1, &retregsx, false); 3397 fixresult_complex87(cdb, e, retregsx, pretregs); 3398 return; 3399 } 3400 regm_t retregsx = mST0 | (*pretregs & mPSW); 3401 codelem(cdb,e.EV.E1, &retregsx, false); 3402 fixresult87(cdb, e, retregsx, pretregs); 3403 return; 3404 } 3405 3406 case OPf_d: 3407 case OPd_f: 3408 if (tycomplex(e.EV.E1.Ety)) 3409 goto Lcomplex; 3410 if (config.fpxmmregs && *pretregs & XMMREGS) 3411 { 3412 xmmcnvt(cdb, e, pretregs); 3413 return; 3414 } 3415 3416 /* if won't do us much good to transfer back and */ 3417 /* forth between 8088 registers and 8087 registers */ 3418 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs)) 3419 { 3420 retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety); 3421 if (retregs & (mXMM1 | mXMM0 |mST01 | mST0)) // if return in ST0 3422 { 3423 codelem(cdb,e.EV.E1,pretregs,false); 3424 if (*pretregs & mST0) 3425 note87(e, 0, 0); 3426 return; 3427 } 3428 else 3429 break; 3430 } 3431 goto Lload87; 3432 3433 case OPs64_d: 3434 if (!I64) 3435 goto Lload87; 3436 goto case OPs32_d; 3437 3438 case OPs32_d: 3439 if (config.fpxmmregs && *pretregs & XMMREGS) 3440 { 3441 xmmcnvt(cdb, e, pretregs); 3442 return; 3443 } 3444 goto Lload87; 3445 3446 case OPs16_d: 3447 case OPu16_d: 3448 Lload87: 3449 load87(cdb,e,0,pretregs,null,-1); 3450 return; 3451 3452 case OPu32_d: 3453 if (I64 && config.fpxmmregs && *pretregs & XMMREGS) 3454 { 3455 xmmcnvt(cdb,e,pretregs); 3456 return; 3457 } 3458 else if (!I16) 3459 { 3460 regm_t retregsx = ALLREGS; 3461 codelem(cdb,e.EV.E1, &retregsx, false); 3462 reg_t reg = findreg(retregsx); 3463 cdb.genfltreg(STO, reg, 0); 3464 regwithvalue(cdb,ALLREGS,0,®,0); 3465 cdb.genfltreg(STO, reg, 4); 3466 3467 push87(cdb); 3468 cdb.genfltreg(0xDF,5,0); // FILD m64int 3469 3470 regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/; 3471 fixresult87(cdb, e, retregsy, pretregs); 3472 return; 3473 } 3474 break; 3475 3476 case OPd_s64: 3477 if (!I64) 3478 goto Lcnvt87; 3479 goto case OPd_s32; 3480 3481 case OPd_s32: 3482 if (config.fpxmmregs) 3483 { 3484 xmmcnvt(cdb,e,pretregs); 3485 return; 3486 } 3487 goto Lcnvt87; 3488 3489 case OPd_s16: 3490 case OPd_u16: 3491 Lcnvt87: 3492 cnvt87(cdb,e,pretregs); 3493 return; 3494 3495 case OPd_u32: // use subroutine, not 8087 3496 if (I64 && config.fpxmmregs) 3497 { 3498 xmmcnvt(cdb,e,pretregs); 3499 return; 3500 } 3501 if (I32 || I64) 3502 { 3503 cdd_u32(cdb,e,pretregs); 3504 return; 3505 } 3506 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || 3507 TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 3508 { 3509 retregs = mST0; 3510 } 3511 else 3512 { 3513 retregs = DOUBLEREGS; 3514 } 3515 goto L1; 3516 3517 case OPd_u64: 3518 if (I32 || I64) 3519 { 3520 cdd_u64(cdb,e,pretregs); 3521 return; 3522 } 3523 retregs = DOUBLEREGS; 3524 goto L1; 3525 3526 case OPu64_d: 3527 if (*pretregs & mST0) 3528 { 3529 regm_t retregsx = I64 ? mAX : mAX|mDX; 3530 codelem(cdb,e.EV.E1,&retregsx,false); 3531 callclib(cdb,e,CLIB.u64_ldbl,pretregs,0); 3532 return; 3533 } 3534 break; 3535 3536 case OPld_u64: 3537 { 3538 if (I32 || I64) 3539 { 3540 cdd_u64(cdb,e,pretregs); 3541 return; 3542 } 3543 regm_t retregsx = mST0; 3544 codelem(cdb,e.EV.E1,&retregsx,false); 3545 callclib(cdb,e,CLIB.ld_u64,pretregs,0); 3546 return; 3547 } 3548 3549 default: 3550 break; 3551 } 3552 } 3553 retregs = regmask(e.EV.E1.Ety, TYnfunc); 3554 L1: 3555 codelem(cdb,e.EV.E1,&retregs,false); 3556 for (int i = 0; 1; i++) 3557 { 3558 assert(i < clib.length); 3559 if (clib[i][0] == e.Eoper) 3560 { 3561 callclib(cdb,e,clib[i][1],pretregs,0); 3562 break; 3563 } 3564 } 3565 } 3566 3567 3568 /*************************** 3569 * Convert short to long. 3570 * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64, 3571 * OPu64_128, OPs64_128 3572 */ 3573 3574 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3575 { 3576 reg_t reg; 3577 regm_t retregs; 3578 3579 //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3580 int e1comsub = e.EV.E1.Ecount; 3581 ubyte op = e.Eoper; 3582 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 3583 { 3584 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 3585 return; 3586 } 3587 else if ( 3588 op == OPnp_fp || 3589 (I16 && op == OPu16_32) || 3590 (I32 && op == OPu32_64) 3591 ) 3592 { 3593 /* Result goes into a register pair. 3594 * Zero extend by putting a zero into most significant reg. 3595 */ 3596 3597 regm_t retregsx = *pretregs & mLSW; 3598 assert(retregsx); 3599 tym_t tym1 = tybasic(e.EV.E1.Ety); 3600 codelem(cdb,e.EV.E1,&retregsx,false); 3601 3602 regm_t regm = *pretregs & (mMSW & ALLREGS); 3603 if (regm == 0) // *pretregs could be mES 3604 regm = mMSW & ALLREGS; 3605 allocreg(cdb,®m,®,TYint); 3606 if (e1comsub) 3607 getregs(cdb,retregsx); 3608 if (op == OPnp_fp) 3609 { 3610 int segreg; 3611 3612 // BUG: what about pointers to functions? 3613 switch (tym1) 3614 { 3615 case TYimmutPtr: 3616 case TYnptr: segreg = SEG_DS; break; 3617 case TYcptr: segreg = SEG_CS; break; 3618 case TYsptr: segreg = SEG_SS; break; 3619 default: assert(0); 3620 } 3621 cdb.gen2(0x8C,modregrm(3,segreg,reg)); // MOV reg,segreg 3622 } 3623 else 3624 movregconst(cdb,reg,0,0); // 0 extend 3625 3626 fixresult(cdb,e,retregsx | regm,pretregs); 3627 return; 3628 } 3629 else if (I64 && op == OPu32_64) 3630 { 3631 elem *e1 = e.EV.E1; 3632 retregs = *pretregs; 3633 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3634 { 3635 code cs; 3636 3637 allocreg(cdb,&retregs,®,TYint); 3638 loadea(cdb,e1,&cs,LOD,reg,0,retregs,retregs); // MOV Ereg,EA 3639 freenode(e1); 3640 } 3641 else 3642 { 3643 *pretregs &= ~mPSW; // flags are set by eval of e1 3644 codelem(cdb,e1,&retregs,false); 3645 /* Determine if high 32 bits are already 0 3646 */ 3647 if (e1.Eoper == OPu16_32 && !e1.Ecount) 3648 { 3649 } 3650 else 3651 { 3652 // Zero high 32 bits 3653 getregs(cdb,retregs); 3654 reg = findreg(retregs); 3655 // Don't use x89 because that will get optimized away 3656 genregs(cdb,LOD,reg,reg); // MOV Ereg,Ereg 3657 } 3658 } 3659 fixresult(cdb,e,retregs,pretregs); 3660 return; 3661 } 3662 else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount) 3663 { 3664 /* Due to how e1 is calculated, the high 32 bits of the register 3665 * are already 0. 3666 */ 3667 retregs = *pretregs; 3668 codelem(cdb,e.EV.E1,&retregs,false); 3669 fixresult(cdb,e,retregs,pretregs); 3670 return; 3671 } 3672 else if (!I16 && (op == OPs16_32 || op == OPu16_32) || 3673 I64 && op == OPs32_64) 3674 { 3675 elem *e11; 3676 elem *e1 = e.EV.E1; 3677 3678 if (e1.Eoper == OPu8_16 && !e1.Ecount && 3679 ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount)) 3680 ) 3681 { 3682 code cs; 3683 3684 retregs = *pretregs & BYTEREGS; 3685 if (!retregs) 3686 retregs = BYTEREGS; 3687 allocreg(cdb,&retregs,®,TYint); 3688 movregconst(cdb,reg,0,0); // XOR reg,reg 3689 loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs); // MOV regL,EA 3690 freenode(e11); 3691 freenode(e1); 3692 } 3693 else if (e1.Eoper == OPvar || 3694 (e1.Eoper == OPind && !e1.Ecount)) 3695 { 3696 code cs = void; 3697 3698 if (I32 && op == OPu16_32 && config.flags4 & CFG4speed) 3699 goto L2; 3700 retregs = *pretregs; 3701 allocreg(cdb,&retregs,®,TYint); 3702 const opcode = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,EA 3703 if (op == OPs32_64) 3704 { 3705 assert(I64); 3706 // MOVSXD reg,e1 3707 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs); 3708 code_orrex(cdb.last(), REX_W); 3709 } 3710 else 3711 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs); 3712 freenode(e1); 3713 } 3714 else 3715 { 3716 L2: 3717 retregs = *pretregs; 3718 if (op == OPs32_64) 3719 retregs = mAX | (*pretregs & mPSW); 3720 *pretregs &= ~mPSW; // flags are already set 3721 CodeBuilder cdbx; 3722 cdbx.ctor(); 3723 codelem(cdbx,e1,&retregs,false); 3724 code *cx = cdbx.finish(); 3725 cdb.append(cdbx); 3726 getregs(cdb,retregs); 3727 if (op == OPu16_32 && cx) 3728 { 3729 cx = code_last(cx); 3730 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) && 3731 mask(cx.Irm & 7) == retregs) 3732 { 3733 // Convert AND of a word to AND of a dword, zeroing upper word 3734 if (cx.Irex & REX_B) 3735 retregs = mask(8 | (cx.Irm & 7)); 3736 cx.Iflags &= ~CFopsize; 3737 cx.IEV2.Vint &= 0xFFFF; 3738 goto L1; 3739 } 3740 } 3741 if (op == OPs16_32 && retregs == mAX) 3742 cdb.gen1(0x98); // CWDE 3743 else if (op == OPs32_64 && retregs == mAX) 3744 { 3745 cdb.gen1(0x98); // CDQE 3746 code_orrex(cdb.last(), REX_W); 3747 } 3748 else 3749 { 3750 reg = findreg(retregs); 3751 if (config.flags4 & CFG4speed && op == OPu16_32) 3752 { // AND reg,0xFFFF 3753 cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu); 3754 } 3755 else 3756 { 3757 opcode_t iop = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,reg 3758 genregs(cdb,iop,reg,reg); 3759 } 3760 } 3761 L1: 3762 if (e1comsub) 3763 getregs(cdb,retregs); 3764 } 3765 fixresult(cdb,e,retregs,pretregs); 3766 return; 3767 } 3768 else if (*pretregs & mPSW || config.target_cpu < TARGET_80286) 3769 { 3770 // OPs16_32, OPs32_64 3771 // CWD doesn't affect flags, so we can depend on the integer 3772 // math to provide the flags. 3773 retregs = mAX | mPSW; // want integer result in AX 3774 *pretregs &= ~mPSW; // flags are already set 3775 codelem(cdb,e.EV.E1,&retregs,false); 3776 getregs(cdb,mDX); // sign extend into DX 3777 cdb.gen1(0x99); // CWD/CDQ 3778 if (e1comsub) 3779 getregs(cdb,retregs); 3780 fixresult(cdb,e,mDX | retregs,pretregs); 3781 return; 3782 } 3783 else 3784 { 3785 // OPs16_32, OPs32_64 3786 uint msreg,lsreg; 3787 3788 retregs = *pretregs & mLSW; 3789 assert(retregs); 3790 codelem(cdb,e.EV.E1,&retregs,false); 3791 retregs |= *pretregs & mMSW; 3792 allocreg(cdb,&retregs,®,e.Ety); 3793 msreg = findregmsw(retregs); 3794 lsreg = findreglsw(retregs); 3795 genmovreg(cdb,msreg,lsreg); // MOV msreg,lsreg 3796 assert(config.target_cpu >= TARGET_80286); // 8088 can't handle SAR reg,imm8 3797 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 3798 fixresult(cdb,e,retregs,pretregs); 3799 return; 3800 } 3801 } 3802 3803 3804 /*************************** 3805 * Convert byte to int. 3806 * For OPu8_16 and OPs8_16. 3807 */ 3808 3809 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3810 { 3811 regm_t retregs; 3812 char size; 3813 3814 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 3815 { 3816 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 3817 return; 3818 } 3819 3820 //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 3821 char op = e.Eoper; 3822 elem *e1 = e.EV.E1; 3823 if (e1.Eoper == OPcomma) 3824 docommas(cdb,&e1); 3825 if (!I16) 3826 { 3827 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3828 { 3829 code cs; 3830 3831 regm_t retregsx = *pretregs; 3832 reg_t reg; 3833 allocreg(cdb,&retregsx,®,TYint); 3834 if (config.flags4 & CFG4speed && 3835 op == OPu8_16 && mask(reg) & BYTEREGS && 3836 config.target_cpu < TARGET_PentiumPro) 3837 { 3838 movregconst(cdb,reg,0,0); // XOR reg,reg 3839 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); // MOV regL,EA 3840 } 3841 else 3842 { 3843 const opcode = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,EA 3844 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx); 3845 } 3846 freenode(e1); 3847 fixresult(cdb,e,retregsx,pretregs); 3848 return; 3849 } 3850 size = tysize(e.Ety); 3851 retregs = *pretregs & BYTEREGS; 3852 if (retregs == 0) 3853 retregs = BYTEREGS; 3854 retregs |= *pretregs & mPSW; 3855 *pretregs &= ~mPSW; 3856 } 3857 else 3858 { 3859 if (op == OPu8_16) // if uint conversion 3860 { 3861 retregs = *pretregs & BYTEREGS; 3862 if (retregs == 0) 3863 retregs = BYTEREGS; 3864 } 3865 else 3866 { 3867 // CBW doesn't affect flags, so we can depend on the integer 3868 // math to provide the flags. 3869 retregs = mAX | (*pretregs & mPSW); // want integer result in AX 3870 } 3871 } 3872 3873 CodeBuilder cdb1; 3874 cdb1.ctor(); 3875 codelem(cdb1,e1,&retregs,false); 3876 code *c1 = cdb1.finish(); 3877 cdb.append(cdb1); 3878 reg_t reg = findreg(retregs); 3879 code *c; 3880 if (!c1) 3881 goto L1; 3882 3883 // If previous instruction is an AND bytereg,value 3884 c = cdb.last(); 3885 if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) && 3886 (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0)) 3887 { 3888 if (*pretregs & mPSW) 3889 c.Iflags |= CFpsw; 3890 c.Iop |= 1; // convert to word operation 3891 c.IEV2.Vuns &= 0xFF; // dump any high order bits 3892 *pretregs &= ~mPSW; // flags already set 3893 } 3894 else 3895 { 3896 L1: 3897 if (!I16) 3898 { 3899 if (op == OPs8_16 && reg == AX && size == 2) 3900 { 3901 cdb.gen1(0x98); // CBW 3902 cdb.last().Iflags |= CFopsize; // don't do a CWDE 3903 } 3904 else 3905 { 3906 // We could do better by not forcing the src and dst 3907 // registers to be the same. 3908 3909 if (config.flags4 & CFG4speed && op == OPu8_16) 3910 { // AND reg,0xFF 3911 cdb.genc2(0x81,modregrmx(3,4,reg),0xFF); 3912 } 3913 else 3914 { 3915 opcode_t iop = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,reg 3916 genregs(cdb,iop,reg,reg); 3917 if (I64 && reg >= 4) 3918 code_orrex(cdb.last(), REX); 3919 } 3920 } 3921 } 3922 else 3923 { 3924 if (op == OPu8_16) 3925 genregs(cdb,0x30,reg+4,reg+4); // XOR regH,regH 3926 else 3927 { 3928 cdb.gen1(0x98); // CBW 3929 *pretregs &= ~mPSW; // flags already set 3930 } 3931 } 3932 } 3933 getregs(cdb,retregs); 3934 fixresult(cdb,e,retregs,pretregs); 3935 } 3936 3937 3938 /*************************** 3939 * Convert long to short (OP32_16). 3940 * Get offset of far pointer (OPoffset). 3941 * Convert int to byte (OP16_8). 3942 * Convert long long to long (OP64_32). 3943 * OP128_64 3944 */ 3945 3946 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3947 { 3948 debug 3949 { 3950 switch (e.Eoper) 3951 { 3952 case OP32_16: 3953 case OPoffset: 3954 case OP16_8: 3955 case OP64_32: 3956 case OP128_64: 3957 break; 3958 3959 default: 3960 assert(0); 3961 } 3962 } 3963 3964 regm_t retregs; 3965 if (e.Eoper == OP16_8) 3966 { 3967 retregs = *pretregs ? BYTEREGS : 0; 3968 codelem(cdb,e.EV.E1,&retregs,false); 3969 } 3970 else 3971 { 3972 if (e.EV.E1.Eoper == OPrelconst) 3973 offsetinreg(cdb,e.EV.E1,&retregs); 3974 else 3975 { 3976 retregs = *pretregs ? ALLREGS : 0; 3977 codelem(cdb,e.EV.E1,&retregs,false); 3978 bool isOff = e.Eoper == OPoffset; 3979 if (I16 || 3980 I32 && (isOff || e.Eoper == OP64_32) || 3981 I64 && (isOff || e.Eoper == OP128_64)) 3982 retregs &= mLSW; // want LSW only 3983 } 3984 } 3985 3986 /* We "destroy" a reg by assigning it the result of a new e, even 3987 * though the values are the same. Weakness of our CSE strategy that 3988 * a register can only hold the contents of one elem at a time. 3989 */ 3990 if (e.Ecount) 3991 getregs(cdb,retregs); 3992 else 3993 useregs(retregs); 3994 3995 debug 3996 if (!(!*pretregs || retregs)) 3997 { 3998 WROP(e.Eoper), 3999 printf(" *pretregs = %s, retregs = %s, e = %p\n",regm_str(*pretregs),regm_str(retregs),e); 4000 } 4001 4002 assert(!*pretregs || retregs); 4003 fixresult(cdb,e,retregs,pretregs); // lsw only 4004 } 4005 4006 /********************************************** 4007 * Get top 32 bits of 64 bit value (I32) 4008 * or top 16 bits of 32 bit value (I16) 4009 * or top 64 bits of 128 bit value (I64). 4010 * OPmsw 4011 */ 4012 4013 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4014 { 4015 assert(e.Eoper == OPmsw); 4016 4017 regm_t retregs = *pretregs ? ALLREGS : 0; 4018 codelem(cdb,e.EV.E1,&retregs,false); 4019 retregs &= mMSW; // want MSW only 4020 4021 /* We "destroy" a reg by assigning it the result of a new e, even 4022 * though the values are the same. Weakness of our CSE strategy that 4023 * a register can only hold the contents of one elem at a time. 4024 */ 4025 if (e.Ecount) 4026 getregs(cdb,retregs); 4027 else 4028 useregs(retregs); 4029 4030 debug 4031 if (!(!*pretregs || retregs)) 4032 { WROP(e.Eoper); 4033 printf(" *pretregs = %s, retregs = %s\n",regm_str(*pretregs),regm_str(retregs)); 4034 elem_print(e); 4035 } 4036 4037 assert(!*pretregs || retregs); 4038 fixresult(cdb,e,retregs,pretregs); // msw only 4039 } 4040 4041 4042 4043 /****************************** 4044 * Handle operators OPinp and OPoutp. 4045 */ 4046 4047 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4048 { 4049 //printf("cdport\n"); 4050 ubyte op = 0xE4; // root of all IN/OUT opcodes 4051 elem *e1 = e.EV.E1; 4052 4053 // See if we can use immediate mode of IN/OUT opcodes 4054 ubyte port; 4055 if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 && 4056 (!evalinregister(e1) || regcon.mvar & mDX)) 4057 { 4058 port = cast(ubyte)e1.EV.Vuns; 4059 freenode(e1); 4060 } 4061 else 4062 { 4063 regm_t retregs = mDX; // port number is always DX 4064 codelem(cdb,e1,&retregs,false); 4065 op |= 0x08; // DX version of opcode 4066 port = 0; // not logically needed, but 4067 // quiets "uninitialized var" complaints 4068 } 4069 4070 uint sz; 4071 if (e.Eoper == OPoutp) 4072 { 4073 sz = tysize(e.EV.E2.Ety); 4074 regm_t retregs = mAX; // byte/word to output is in AL/AX 4075 scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true); 4076 op |= 0x02; // OUT opcode 4077 } 4078 else // OPinp 4079 { 4080 getregs(cdb,mAX); 4081 sz = tysize(e.Ety); 4082 } 4083 4084 if (sz != 1) 4085 op |= 1; // word operation 4086 cdb.genc2(op,0,port); // IN/OUT AL/AX,DX/port 4087 if (op & 1 && sz != REGSIZE) // if need size override 4088 cdb.last().Iflags |= CFopsize; 4089 regm_t retregs = mAX; 4090 fixresult(cdb,e,retregs,pretregs); 4091 } 4092 4093 /************************ 4094 * Generate code for an asm elem. 4095 */ 4096 4097 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4098 { 4099 // Assume only regs normally destroyed by a function are destroyed 4100 getregs(cdb,(ALLREGS | mES) & ~fregsaved); 4101 cdb.genasm(cast(char *)e.EV.Vstring, cast(uint)e.EV.Vstrlen); 4102 fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs); 4103 } 4104 4105 /************************ 4106 * Generate code for OPnp_f16p and OPf16p_np. 4107 */ 4108 4109 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4110 { 4111 code *cnop; 4112 code cs; 4113 4114 assert(I32); 4115 codelem(cdb,e.EV.E1,pretregs,false); 4116 reg_t reg = findreg(*pretregs); 4117 getregs(cdb,*pretregs); // we will destroy the regs 4118 4119 cs.Iop = 0xC1; 4120 cs.Irm = modregrm(3,0,reg); 4121 cs.Iflags = 0; 4122 cs.Irex = 0; 4123 cs.IFL2 = FLconst; 4124 cs.IEV2.Vuns = 16; 4125 4126 cdb.gen(&cs); // ROL ereg,16 4127 cs.Irm |= modregrm(0,1,0); 4128 cdb.gen(&cs); // ROR ereg,16 4129 cs.IEV2.Vuns = 3; 4130 cs.Iflags |= CFopsize; 4131 4132 if (e.Eoper == OPnp_f16p) 4133 { 4134 /* OR ereg,ereg 4135 JE L1 4136 ROR ereg,16 4137 SHL reg,3 4138 MOV rx,SS 4139 AND rx,3 ;mask off CPL bits 4140 OR rl,4 ;run on LDT bit 4141 OR regl,rl 4142 ROL ereg,16 4143 L1: NOP 4144 */ 4145 reg_t rx; 4146 4147 regm_t retregs = BYTEREGS & ~*pretregs; 4148 allocreg(cdb,&retregs,&rx,TYint); 4149 cnop = gennop(null); 4150 int jop = JCXZ; 4151 if (reg != CX) 4152 { 4153 gentstreg(cdb,reg); 4154 jop = JE; 4155 } 4156 genjmp(cdb,jop,FLcode, cast(block *)cnop); // Jop L1 4157 NEWREG(cs.Irm,4); 4158 cdb.gen(&cs); // SHL reg,3 4159 genregs(cdb,0x8C,2,rx); // MOV rx,SS 4160 int isbyte = (mask(reg) & BYTEREGS) == 0; 4161 cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3); // AND rl,3 4162 cdb.genc2(0x80,modregrm(3,1,rx),4); // OR rl,4 4163 genregs(cdb,0x0A | isbyte,reg,rx); // OR regl,rl 4164 } 4165 else // OPf16p_np 4166 { 4167 /* ROR ereg,16 4168 SHR reg,3 4169 ROL ereg,16 4170 */ 4171 4172 cs.Irm |= modregrm(0,5,0); 4173 cdb.gen(&cs); // SHR reg,3 4174 cnop = null; 4175 } 4176 } 4177 4178 /************************* 4179 * Generate code for OPbtst 4180 */ 4181 4182 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4183 { 4184 regm_t retregs; 4185 reg_t reg; 4186 4187 //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 4188 4189 opcode_t op = 0xA3; // BT EA,value 4190 int mode = 4; 4191 4192 elem *e1 = e.EV.E1; 4193 elem *e2 = e.EV.E2; 4194 code cs; 4195 cs.Iflags = 0; 4196 4197 if (*pretregs == 0) // if don't want result 4198 { 4199 codelem(cdb,e1,pretregs,false); // eval left leaf 4200 *pretregs = 0; // in case they got set 4201 codelem(cdb,e2,pretregs,false); 4202 return; 4203 } 4204 4205 regm_t idxregs; 4206 if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar) 4207 { 4208 getlvalue(cdb, &cs, e1, RMload); // get addressing mode 4209 idxregs = idxregm(&cs); // mask if index regs used 4210 } 4211 else 4212 { 4213 retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs; 4214 codelem(cdb,e1, &retregs, false); 4215 reg = findreg(retregs); 4216 cs.Irm = modregrm(3,0,reg & 7); 4217 cs.Iflags = 0; 4218 cs.Irex = 0; 4219 if (reg & 8) 4220 cs.Irex |= REX_B; 4221 idxregs = retregs; 4222 } 4223 4224 tym_t ty1 = tybasic(e1.Ety); 4225 const sz = tysize(e1.Ety); 4226 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 4227 4228 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 4229 if (e2.Eoper == OPconst) 4230 { 4231 cs.Iop = 0x0FBA; // BT rm,imm8 4232 cs.Irm |= modregrm(0,mode,0); 4233 cs.Iflags |= CFpsw | word; 4234 cs.IFL2 = FLconst; 4235 if (sz <= SHORTSIZE) 4236 { 4237 cs.IEV2.Vint = e2.EV.Vint & 15; 4238 } 4239 else if (sz == 4) 4240 { 4241 cs.IEV2.Vint = e2.EV.Vint & 31; 4242 } 4243 else 4244 { 4245 cs.IEV2.Vint = e2.EV.Vint & 63; 4246 if (I64) 4247 cs.Irex |= REX_W; 4248 } 4249 cdb.gen(&cs); 4250 } 4251 else 4252 { 4253 retregs = ALLREGS & ~idxregs; 4254 4255 /* A register variable may not have its upper 32 4256 * bits 0, so pick a different register to force 4257 * a MOV which will clear it 4258 */ 4259 if (I64 && sz == 8 && tysize(e2.Ety) == 4) 4260 { 4261 regm_t rregm; 4262 if (isregvar(e2, &rregm, null)) 4263 retregs &= ~rregm; 4264 } 4265 4266 scodelem(cdb,e2,&retregs,idxregs,true); 4267 reg = findreg(retregs); 4268 4269 cs.Iop = 0x0F00 | op; // BT rm,reg 4270 code_newreg(&cs,reg); 4271 cs.Iflags |= CFpsw | word; 4272 if (I64 && _tysize[ty1] == 8) 4273 cs.Irex |= REX_W; 4274 cdb.gen(&cs); 4275 } 4276 4277 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 4278 { 4279 if (tysize(e.Ety) == 1) 4280 { 4281 assert(I64 || retregs & BYTEREGS); 4282 allocreg(cdb,&retregs,®,TYint); 4283 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 4284 if (I64 && reg >= 4) 4285 code_orrex(cdb.last(), REX); 4286 *pretregs = retregs; 4287 } 4288 else 4289 { 4290 code *cnop = null; 4291 regm_t save = regcon.immed.mval; 4292 allocreg(cdb,&retregs,®,TYint); 4293 regcon.immed.mval = save; 4294 if ((*pretregs & mPSW) == 0) 4295 { 4296 getregs(cdb,retregs); 4297 genregs(cdb,0x19,reg,reg); // SBB reg,reg 4298 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 4299 } 4300 else 4301 { 4302 movregconst(cdb,reg,1,8); // MOV reg,1 4303 cnop = gennop(null); 4304 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 4305 // MOV reg,0 4306 movregconst(cdb,reg,0,8); 4307 regcon.immed.mval &= ~mask(reg); 4308 } 4309 *pretregs = retregs; 4310 cdb.append(cnop); 4311 } 4312 } 4313 } 4314 4315 /************************* 4316 * Generate code for OPbt, OPbtc, OPbtr, OPbts 4317 */ 4318 4319 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 4320 { 4321 //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs)); 4322 regm_t retregs; 4323 reg_t reg; 4324 opcode_t op; 4325 int mode; 4326 4327 switch (e.Eoper) 4328 { 4329 case OPbt: op = 0xA3; mode = 4; break; 4330 case OPbtc: op = 0xBB; mode = 7; break; 4331 case OPbtr: op = 0xB3; mode = 6; break; 4332 case OPbts: op = 0xAB; mode = 5; break; 4333 4334 default: 4335 assert(0); 4336 } 4337 4338 elem *e1 = e.EV.E1; 4339 elem *e2 = e.EV.E2; 4340 code cs; 4341 cs.Iflags = 0; 4342 4343 getlvalue(cdb, &cs, e, RMload); // get addressing mode 4344 if (e.Eoper == OPbt && *pretregs == 0) 4345 { 4346 codelem(cdb,e2,pretregs,false); 4347 return; 4348 } 4349 4350 const ty1 = tybasic(e1.Ety); 4351 const ty2 = tybasic(e2.Ety); 4352 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 4353 regm_t idxregs = idxregm(&cs); // mask if index regs used 4354 4355 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 4356 if (e2.Eoper == OPconst) 4357 { 4358 cs.Iop = 0x0FBA; // BT rm,imm8 4359 cs.Irm |= modregrm(0,mode,0); 4360 cs.Iflags |= CFpsw | word; 4361 cs.IFL2 = FLconst; 4362 if (_tysize[ty1] == SHORTSIZE) 4363 { 4364 cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3; 4365 cs.IEV2.Vint = e2.EV.Vint & 15; 4366 } 4367 else if (_tysize[ty1] == 4) 4368 { 4369 cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3; 4370 cs.IEV2.Vint = e2.EV.Vint & 31; 4371 } 4372 else 4373 { 4374 cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3; 4375 cs.IEV2.Vint = e2.EV.Vint & 63; 4376 if (I64) 4377 cs.Irex |= REX_W; 4378 } 4379 cdb.gen(&cs); 4380 } 4381 else 4382 { 4383 retregs = ALLREGS & ~idxregs; 4384 scodelem(cdb,e2,&retregs,idxregs,true); 4385 reg = findreg(retregs); 4386 4387 cs.Iop = 0x0F00 | op; // BT rm,reg 4388 code_newreg(&cs,reg); 4389 cs.Iflags |= CFpsw | word; 4390 if (_tysize[ty2] == 8 && I64) 4391 cs.Irex |= REX_W; 4392 cdb.gen(&cs); 4393 } 4394 4395 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 4396 { 4397 if (_tysize[e.Ety] == 1) 4398 { 4399 assert(I64 || retregs & BYTEREGS); 4400 allocreg(cdb,&retregs,®,TYint); 4401 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 4402 if (I64 && reg >= 4) 4403 code_orrex(cdb.last(), REX); 4404 *pretregs = retregs; 4405 } 4406 else 4407 { 4408 code *cnop = null; 4409 const save = regcon.immed.mval; 4410 allocreg(cdb,&retregs,®,TYint); 4411 regcon.immed.mval = save; 4412 if ((*pretregs & mPSW) == 0) 4413 { 4414 getregs(cdb,retregs); 4415 genregs(cdb,0x19,reg,reg); // SBB reg,reg 4416 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 4417 } 4418 else 4419 { 4420 movregconst(cdb,reg,1,8); // MOV reg,1 4421 cnop = gennop(null); 4422 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 4423 // MOV reg,0 4424 movregconst(cdb,reg,0,8); 4425 regcon.immed.mval &= ~mask(reg); 4426 } 4427 *pretregs = retregs; 4428 cdb.append(cnop); 4429 } 4430 } 4431 } 4432 4433 /************************************* 4434 * Generate code for OPbsf and OPbsr. 4435 */ 4436 4437 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4438 { 4439 //printf("cdbscan()\n"); 4440 //elem_print(e); 4441 if (!*pretregs) 4442 { 4443 codelem(cdb,e.EV.E1,pretregs,false); 4444 return; 4445 } 4446 4447 const tyml = tybasic(e.EV.E1.Ety); 4448 const sz = _tysize[tyml]; 4449 assert(sz == 2 || sz == 4 || sz == 8); 4450 code cs = void; 4451 4452 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 4453 { 4454 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 4455 } 4456 else 4457 { 4458 regm_t retregs = allregs; 4459 codelem(cdb,e.EV.E1, &retregs, false); 4460 const reg = findreg(retregs); 4461 cs.Irm = modregrm(3,0,reg & 7); 4462 cs.Iflags = 0; 4463 cs.Irex = 0; 4464 if (reg & 8) 4465 cs.Irex |= REX_B; 4466 } 4467 4468 regm_t retregs = *pretregs & allregs; 4469 if (!retregs) 4470 retregs = allregs; 4471 reg_t reg; 4472 allocreg(cdb,&retregs, ®, e.Ety); 4473 4474 cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD; // BSF/BSR reg,EA 4475 code_newreg(&cs, reg); 4476 if (!I16 && sz == SHORTSIZE) 4477 cs.Iflags |= CFopsize; 4478 cdb.gen(&cs); 4479 if (sz == 8) 4480 code_orrex(cdb.last(), REX_W); 4481 4482 fixresult(cdb,e,retregs,pretregs); 4483 } 4484 4485 /************************ 4486 * OPpopcnt operator 4487 */ 4488 4489 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4490 { 4491 //printf("cdpopcnt()\n"); 4492 //elem_print(e); 4493 assert(!I16); 4494 if (!*pretregs) 4495 { 4496 codelem(cdb,e.EV.E1,pretregs,false); 4497 return; 4498 } 4499 4500 const tyml = tybasic(e.EV.E1.Ety); 4501 4502 const sz = _tysize[tyml]; 4503 assert(sz == 2 || sz == 4 || (sz == 8 && I64)); // no byte op 4504 4505 code cs = void; 4506 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 4507 { 4508 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 4509 } 4510 else 4511 { 4512 regm_t retregs = allregs; 4513 codelem(cdb,e.EV.E1, &retregs, false); 4514 const reg = findreg(retregs); 4515 cs.Irm = modregrm(3,0,reg & 7); 4516 cs.Iflags = 0; 4517 cs.Irex = 0; 4518 if (reg & 8) 4519 cs.Irex |= REX_B; 4520 } 4521 4522 regm_t retregs = *pretregs & allregs; 4523 if (!retregs) 4524 retregs = allregs; 4525 reg_t reg; 4526 allocreg(cdb,&retregs, ®, e.Ety); 4527 4528 cs.Iop = POPCNT; // POPCNT reg,EA 4529 code_newreg(&cs, reg); 4530 if (sz == SHORTSIZE) 4531 cs.Iflags |= CFopsize; 4532 if (*pretregs & mPSW) 4533 cs.Iflags |= CFpsw; 4534 cdb.gen(&cs); 4535 if (sz == 8) 4536 code_orrex(cdb.last(), REX_W); 4537 *pretregs &= mBP | ALLREGS; // flags already set 4538 4539 fixresult(cdb,e,retregs,pretregs); 4540 } 4541 4542 4543 /******************************************* 4544 * Generate code for OPpair, OPrpair. 4545 */ 4546 4547 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4548 { 4549 if (*pretregs == 0) // if don't want result 4550 { 4551 codelem(cdb,e.EV.E1,pretregs,false); // eval left leaf 4552 *pretregs = 0; // in case they got set 4553 codelem(cdb,e.EV.E2,pretregs,false); 4554 return; 4555 } 4556 4557 //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4558 //printf("Ecount = %d\n", e.Ecount); 4559 4560 regm_t retregs = *pretregs; 4561 if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087) 4562 { 4563 if (config.fpxmmregs) 4564 retregs |= mXMM0 | mXMM1; 4565 else 4566 retregs |= mST01; 4567 } 4568 4569 if (retregs & mST01) 4570 { 4571 loadPair87(cdb, e, pretregs); 4572 return; 4573 } 4574 4575 regm_t regs1; 4576 regm_t regs2; 4577 if (retregs & XMMREGS) 4578 { 4579 retregs &= XMMREGS; 4580 const reg = findreg(retregs); 4581 regs1 = mask(reg); 4582 regs2 = mask(findreg(retregs & ~regs1)); 4583 } 4584 else 4585 { 4586 retregs &= allregs; 4587 if (!retregs) 4588 retregs = allregs; 4589 regs1 = retregs & mLSW; 4590 regs2 = retregs & mMSW; 4591 } 4592 if (e.Eoper == OPrpair) 4593 { 4594 // swap 4595 regs1 ^= regs2; 4596 regs2 ^= regs1; 4597 regs1 ^= regs2; 4598 } 4599 //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2)); 4600 4601 codelem(cdb,e.EV.E1, ®s1, false); 4602 scodelem(cdb,e.EV.E2, ®s2, regs1, false); 4603 //printf("2: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2)); 4604 4605 if (e.EV.E1.Ecount) 4606 getregs(cdb,regs1); 4607 if (e.EV.E2.Ecount) 4608 getregs(cdb,regs2); 4609 4610 fixresult(cdb,e,regs1 | regs2,pretregs); 4611 } 4612 4613 /************************* 4614 * Generate code for OPcmpxchg 4615 */ 4616 4617 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4618 { 4619 /* The form is: 4620 * OPcmpxchg 4621 * / \ 4622 * lvalue OPparam 4623 * / \ 4624 * old new 4625 */ 4626 4627 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 4628 elem *e1 = e.EV.E1; 4629 elem *e2 = e.EV.E2; 4630 assert(e2.Eoper == OPparam); 4631 assert(!e2.Ecount); 4632 4633 const tyml = tybasic(e1.Ety); // type of lvalue 4634 const sz = _tysize[tyml]; 4635 4636 if (I32 && sz == 8) 4637 { 4638 regm_t retregsx = mDX|mAX; 4639 codelem(cdb,e2.EV.E1,&retregsx,false); // [DX,AX] = e2.EV.E1 4640 4641 regm_t retregs = mCX|mBX; 4642 scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false); // [CX,BX] = e2.EV.E2 4643 4644 code cs = void; 4645 getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX); // get EA 4646 4647 getregs(cdb,mDX|mAX); // CMPXCHG destroys these regs 4648 4649 if (e1.Ety & mTYvolatile) 4650 cdb.gen1(LOCK); // LOCK prefix 4651 cs.Iop = 0x0FC7; // CMPXCHG8B EA 4652 cs.Iflags |= CFpsw; 4653 code_newreg(&cs,1); 4654 cdb.gen(&cs); 4655 4656 assert(!e1.Ecount); 4657 freenode(e1); 4658 } 4659 else 4660 { 4661 const uint isbyte = (sz == 1); // 1 for byte operation 4662 const ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 4663 const uint rex = (I64 && sz == 8) ? REX_W : 0; 4664 4665 regm_t retregsx = mAX; 4666 codelem(cdb,e2.EV.E1,&retregsx,false); // AX = e2.EV.E1 4667 4668 regm_t retregs = (ALLREGS | mBP) & ~mAX; 4669 scodelem(cdb,e2.EV.E2,&retregs,mAX,false); // load rvalue in reg 4670 4671 code cs = void; 4672 getlvalue(cdb,&cs,e1,mAX | retregs); // get EA 4673 4674 getregs(cdb,mAX); // CMPXCHG destroys AX 4675 4676 if (e1.Ety & mTYvolatile) 4677 cdb.gen1(LOCK); // LOCK prefix 4678 cs.Iop = 0x0FB1 ^ isbyte; // CMPXCHG EA,reg 4679 cs.Iflags |= CFpsw | word; 4680 cs.Irex |= rex; 4681 const reg = findreg(retregs); 4682 code_newreg(&cs,reg); 4683 cdb.gen(&cs); 4684 4685 assert(!e1.Ecount); 4686 freenode(e1); 4687 } 4688 4689 if (regm_t retregs = *pretregs & (ALLREGS | mBP)) // if return result in register 4690 { 4691 assert(tysize(e.Ety) == 1); 4692 assert(I64 || retregs & BYTEREGS); 4693 reg_t reg; 4694 allocreg(cdb,&retregs,®,TYint); 4695 uint ea = modregrmx(3,0,reg); 4696 if (I64 && reg >= 4) 4697 ea |= REX << 16; 4698 cdb.gen2(0x0F94,ea); // SETZ reg 4699 *pretregs = retregs; 4700 } 4701 } 4702 4703 /************************* 4704 * Generate code for OPprefetch 4705 */ 4706 4707 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4708 { 4709 /* Generate the following based on e2: 4710 * 0: prefetch0 4711 * 1: prefetch1 4712 * 2: prefetch2 4713 * 3: prefetchnta 4714 * 4: prefetchw 4715 * 5: prefetchwt1 4716 */ 4717 //printf("cdprefetch\n"); 4718 elem *e1 = e.EV.E1; 4719 4720 assert(*pretregs == 0); 4721 assert(e.EV.E2.Eoper == OPconst); 4722 opcode_t op; 4723 reg_t reg; 4724 switch (e.EV.E2.EV.Vuns) 4725 { 4726 case 0: op = PREFETCH; reg = 1; break; // PREFETCH0 4727 case 1: op = PREFETCH; reg = 2; break; // PREFETCH1 4728 case 2: op = PREFETCH; reg = 3; break; // PREFETCH2 4729 case 3: op = PREFETCH; reg = 0; break; // PREFETCHNTA 4730 case 4: op = 0x0F0D; reg = 1; break; // PREFETCHW 4731 case 5: op = 0x0F0D; reg = 2; break; // PREFETCHWT1 4732 default: assert(0); 4733 } 4734 4735 freenode(e.EV.E2); 4736 4737 code cs = void; 4738 getlvalue(cdb,&cs,e1,0); 4739 cs.Iop = op; 4740 cs.Irm |= modregrm(0,reg,0); 4741 cs.Iflags |= CFvolatile; // do not schedule 4742 cdb.gen(&cs); 4743 } 4744 4745 4746 /********************* 4747 * Load register from EA of assignment operation. 4748 * Params: 4749 * cdb = store generated code here 4750 * cs = instruction with EA already set in it 4751 * e = assignment expression that will be evaluated 4752 * reg = set to register loaded from EA 4753 * retregs = register candidates for reg 4754 */ 4755 private 4756 void opAssLoadReg(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t reg, regm_t retregs) 4757 { 4758 modEA(cdb, &cs); 4759 allocreg(cdb,&retregs,®,TYoffset); 4760 4761 cs.Iop = LOD; 4762 code_newreg(&cs,reg); 4763 cdb.gen(&cs); // MOV reg,EA 4764 } 4765 4766 /********************* 4767 * Load register pair from EA of assignment operation. 4768 * Params: 4769 * cdb = store generated code here 4770 * cs = instruction with EA already set in it 4771 * e = assignment expression that will be evaluated 4772 * rhi = set to most significant register of the pair 4773 * rlo = set toleast significant register of the pair 4774 * retregs = register candidates for rhi, rlo 4775 * keepmsk = registers to not modify 4776 */ 4777 private 4778 void opAssLoadPair(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t rhi, out reg_t rlo, regm_t retregs, regm_t keepmsk) 4779 { 4780 getlvalue(cdb,&cs,e.EV.E1,retregs | keepmsk); 4781 const tym_t tyml = tybasic(e.EV.E1.Ety); // type of lvalue 4782 reg_t reg; 4783 allocreg(cdb,&retregs,®,tyml); 4784 4785 rhi = findregmsw(retregs); 4786 rlo = findreglsw(retregs); 4787 4788 cs.Iop = LOD; 4789 code_newreg(&cs,rlo); 4790 cdb.gen(&cs); // MOV rlo,EA 4791 getlvalue_msw(&cs); 4792 code_newreg(&cs,rhi); 4793 cdb.gen(&cs); // MOV rhi,EA+2 4794 getlvalue_lsw(&cs); 4795 } 4796 4797 4798 /********************************************************* 4799 * Store register result of assignment operation EA. 4800 * Params: 4801 * cdb = store generated code here 4802 * cs = instruction with EA already set in it 4803 * e = assignment expression that was evaluated 4804 * reg = register of result 4805 * pretregs = registers to store result in 4806 */ 4807 private 4808 void opAssStoreReg(ref CodeBuilder cdb, ref code cs, elem* e, reg_t reg, regm_t* pretregs) 4809 { 4810 elem* e1 = e.EV.E1; 4811 const tym_t tyml = tybasic(e1.Ety); // type of lvalue 4812 const uint sz = _tysize[tyml]; 4813 const ubyte isbyte = (sz == 1); // 1 for byte operation 4814 cs.Iop = STO ^ isbyte; 4815 code_newreg(&cs,reg); 4816 cdb.gen(&cs); // MOV EA,resreg 4817 if (e1.Ecount) // if we gen a CSE 4818 cssave(e1,mask(reg),!OTleaf(e1.Eoper)); 4819 freenode(e1); 4820 fixresult(cdb,e,mask(reg),pretregs); 4821 } 4822 4823 /********************************************************* 4824 * Store register pair result of assignment operation EA. 4825 * Params: 4826 * cdb = store generated code here 4827 * cs = instruction with EA already set in it 4828 * e = assignment expression that was evaluated 4829 * rhi = most significant register of the pair 4830 * rlo = least significant register of the pair 4831 * pretregs = registers to store result in 4832 */ 4833 private 4834 void opAssStorePair(ref CodeBuilder cdb, ref code cs, elem* e, reg_t rhi, reg_t rlo, regm_t* pretregs) 4835 { 4836 cs.Iop = STO; 4837 code_newreg(&cs,rlo); 4838 cdb.gen(&cs); // MOV EA,lsreg 4839 code_newreg(&cs,rhi); 4840 getlvalue_msw(&cs); 4841 cdb.gen(&cs); // MOV EA+REGSIZE,msreg 4842 const regm_t retregs = mask(rhi) | mask(rlo); 4843 elem* e1 = e.EV.E1; 4844 if (e1.Ecount) // if we gen a CSE 4845 cssave(e1,retregs,!OTleaf(e1.Eoper)); 4846 freenode(e1); 4847 fixresult(cdb,e,retregs,pretregs); 4848 } 4849 4850 4851 }