1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1985-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d) 10 */ 11 12 module dmd.backend.cod4; 13 14 version (SCPP) 15 version = COMPILE; 16 version (MARS) 17 version = COMPILE; 18 19 version (COMPILE) 20 { 21 22 import core.stdc.stdio; 23 import core.stdc.stdlib; 24 import core.stdc.string; 25 26 import dmd.backend.cc; 27 import dmd.backend.cdef; 28 import dmd.backend.code; 29 import dmd.backend.code_x86; 30 import dmd.backend.codebuilder; 31 import dmd.backend.mem; 32 import dmd.backend.el; 33 import dmd.backend.global; 34 import dmd.backend.oper; 35 import dmd.backend.ty; 36 import dmd.backend.evalu8 : el_toldoubled; 37 import dmd.backend.xmm; 38 39 extern (C++): 40 41 nothrow: 42 43 int REGSIZE(); 44 45 extern __gshared CGstate cgstate; 46 extern __gshared bool[FLMAX] datafl; 47 48 private extern (D) uint mask(uint m) { return 1 << m; } 49 50 /* AX,CX,DX,BX */ 51 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; 52 53 54 /******************************* 55 * Return number of times symbol s appears in tree e. 56 */ 57 58 private int intree(Symbol *s,elem *e) 59 { 60 if (!OTleaf(e.Eoper)) 61 return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0); 62 return e.Eoper == OPvar && e.EV.Vsym == s; 63 } 64 65 /*********************************** 66 * Determine if expression e can be evaluated directly into register 67 * variable s. 68 * Have to be careful about things like x=x+x+x, and x=a+x. 69 * Returns: 70 * !=0 can 71 * 0 can't 72 */ 73 74 int doinreg(Symbol *s, elem *e) 75 { 76 int in_ = 0; 77 OPER op; 78 79 L1: 80 op = e.Eoper; 81 if (op == OPind || 82 OTcall(op) || 83 OTleaf(op) || 84 (in_ = intree(s,e)) == 0 || 85 (OTunary(op) && OTleaf(e.EV.E1.Eoper)) 86 ) 87 return 1; 88 if (in_ == 1) 89 { 90 switch (op) 91 { 92 case OPadd: 93 case OPmin: 94 case OPand: 95 case OPor: 96 case OPxor: 97 case OPshl: 98 case OPmul: 99 if (!intree(s,e.EV.E2)) 100 { 101 e = e.EV.E1; 102 goto L1; 103 } 104 break; 105 106 default: 107 break; 108 } 109 } 110 return 0; 111 } 112 113 /**************************** 114 * Return code for saving common subexpressions if EA 115 * turns out to be a register. 116 * This is called just before modifying an EA. 117 */ 118 119 void modEA(ref CodeBuilder cdb,code *c) 120 { 121 if ((c.Irm & 0xC0) == 0xC0) // addressing mode refers to a register 122 { 123 reg_t reg = c.Irm & 7; 124 if (c.Irex & REX_B) 125 { reg |= 8; 126 assert(I64); 127 } 128 getregs(cdb,mask(reg)); 129 } 130 } 131 132 static if (TARGET_WINDOS) 133 { 134 // This code is for CPUs that do not support the 8087 135 136 /**************************** 137 * Gen code for op= for doubles. 138 */ 139 140 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op) 141 { 142 static immutable uint[OPdivass - OPpostinc + 1] clibtab = 143 /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass */ 144 [ CLIB.dadd, CLIB.dsub, cast(uint)-1, CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ]; 145 146 if (config.inline8087) 147 { 148 opass87(cdb,e,pretregs); 149 return; 150 } 151 152 code cs; 153 regm_t retregs2,retregs,idxregs; 154 155 uint clib = clibtab[op - OPpostinc]; 156 elem *e1 = e.EV.E1; 157 tym_t tym = tybasic(e1.Ety); 158 getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX); 159 160 if (tym == TYfloat) 161 { 162 clib += CLIB.fadd - CLIB.dadd; /* convert to float operation */ 163 164 // Load EA into FLOATREGS 165 getregs(cdb,FLOATREGS); 166 cs.Iop = 0x8B; 167 cs.Irm |= modregrm(0,AX,0); 168 cdb.gen(&cs); 169 170 if (!I32) 171 { 172 cs.Irm |= modregrm(0,DX,0); 173 getlvalue_msw(&cs); 174 cdb.gen(&cs); 175 getlvalue_lsw(&cs); 176 177 } 178 retregs2 = FLOATREGS2; 179 idxregs = FLOATREGS | idxregm(&cs); 180 retregs = FLOATREGS; 181 } 182 else 183 { 184 if (I32) 185 { 186 // Load EA into DOUBLEREGS 187 getregs(cdb,DOUBLEREGS_32); 188 cs.Iop = 0x8B; 189 cs.Irm |= modregrm(0,AX,0); 190 cdb.gen(&cs); 191 cs.Irm |= modregrm(0,DX,0); 192 getlvalue_msw(&cs); 193 cdb.gen(&cs); 194 getlvalue_lsw(&cs); 195 196 retregs2 = DOUBLEREGS2_32; 197 idxregs = DOUBLEREGS_32 | idxregm(&cs); 198 } 199 else 200 { 201 // Push EA onto stack 202 cs.Iop = 0xFF; 203 cs.Irm |= modregrm(0,6,0); 204 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 205 cdb.gen(&cs); 206 getlvalue_lsw(&cs); 207 cdb.gen(&cs); 208 getlvalue_lsw(&cs); 209 cdb.gen(&cs); 210 getlvalue_lsw(&cs); 211 cdb.gen(&cs); 212 stackpush += DOUBLESIZE; 213 214 retregs2 = DOUBLEREGS_16; 215 idxregs = idxregm(&cs); 216 } 217 retregs = DOUBLEREGS; 218 } 219 220 if ((cs.Iflags & CFSEG) == CFes) 221 idxregs |= mES; 222 cgstate.stackclean++; 223 scodelem(cdb,e.EV.E2,&retregs2,idxregs,false); 224 cgstate.stackclean--; 225 callclib(cdb,e,clib,&retregs,0); 226 if (e1.Ecount) 227 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 228 freenode(e1); 229 cs.Iop = 0x89; // MOV EA,DOUBLEREGS 230 fltregs(cdb,&cs,tym); 231 fixresult(cdb,e,retregs,pretregs); 232 } 233 234 /**************************** 235 * Gen code for OPnegass for doubles. 236 */ 237 238 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 239 { 240 if (config.inline8087) 241 { 242 cdnegass87(cdb,e,pretregs); 243 return; 244 } 245 elem *e1 = e.EV.E1; 246 tym_t tym = tybasic(e1.Ety); 247 int sz = _tysize[tym]; 248 code cs; 249 250 getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0); 251 modEA(cdb,&cs); 252 cs.Irm |= modregrm(0,6,0); 253 cs.Iop = 0x80; 254 cs.IEV1.Voffset += sz - 1; 255 cs.IFL2 = FLconst; 256 cs.IEV2.Vuns = 0x80; 257 cdb.gen(&cs); // XOR 7[EA],0x80 258 if (tycomplex(tym)) 259 { 260 cs.IEV1.Voffset -= sz / 2; 261 cdb.gen(&cs); // XOR 7[EA],0x80 262 } 263 264 regm_t retregs; 265 if (*pretregs || e1.Ecount) 266 { 267 cs.IEV1.Voffset -= sz - 1; 268 269 if (tym == TYfloat) 270 { 271 // Load EA into FLOATREGS 272 getregs(cdb,FLOATREGS); 273 cs.Iop = 0x8B; 274 NEWREG(cs.Irm, AX); 275 cdb.gen(&cs); 276 277 if (!I32) 278 { 279 NEWREG(cs.Irm, DX); 280 getlvalue_msw(&cs); 281 cdb.gen(&cs); 282 getlvalue_lsw(&cs); 283 284 } 285 retregs = FLOATREGS; 286 } 287 else 288 { 289 if (I32) 290 { 291 // Load EA into DOUBLEREGS 292 getregs(cdb,DOUBLEREGS_32); 293 cs.Iop = 0x8B; 294 cs.Irm &= ~cast(uint)modregrm(0,7,0); 295 cs.Irm |= modregrm(0,AX,0); 296 cdb.gen(&cs); 297 cs.Irm |= modregrm(0,DX,0); 298 getlvalue_msw(&cs); 299 cdb.gen(&cs); 300 getlvalue_lsw(&cs); 301 } 302 else 303 { 304 static if (1) 305 { 306 cs.Iop = 0x8B; 307 fltregs(cdb,&cs,TYdouble); // MOV DOUBLEREGS, EA 308 } 309 else 310 { 311 // Push EA onto stack 312 cs.Iop = 0xFF; 313 cs.Irm |= modregrm(0,6,0); 314 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 315 cdb.gen(&cs); 316 cs.IEV1.Voffset -= REGSIZE; 317 cdb.gen(&cs); 318 cs.IEV1.Voffset -= REGSIZE; 319 cdb.gen(&cs); 320 cs.IEV1.Voffset -= REGSIZE; 321 cdb.gen(&cs); 322 stackpush += DOUBLESIZE; 323 } 324 } 325 retregs = DOUBLEREGS; 326 } 327 if (e1.Ecount) 328 cssave(e1,retregs,!OTleaf(e1.Eoper)); /* if lvalue is a CSE */ 329 } 330 else 331 { 332 retregs = 0; 333 assert(e1.Ecount == 0); 334 } 335 336 freenode(e1); 337 fixresult(cdb,e,retregs,pretregs); 338 } 339 } 340 341 342 343 /************************ 344 * Generate code for an assignment. 345 */ 346 347 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 348 { 349 tym_t tymll; 350 reg_t reg; 351 code cs; 352 elem *e11; 353 bool regvar; // true means evaluate into register variable 354 regm_t varregm; 355 reg_t varreg; 356 targ_int postinc; 357 358 //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 359 elem *e1 = e.EV.E1; 360 elem *e2 = e.EV.E2; 361 int e2oper = e2.Eoper; 362 tym_t tyml = tybasic(e1.Ety); // type of lvalue 363 regm_t retregs = *pretregs; 364 365 if (tyxmmreg(tyml) && config.fpxmmregs) 366 { 367 xmmeq(cdb, e, CMP, e1, e2, pretregs); 368 return; 369 } 370 371 if (tyfloating(tyml) && config.inline8087) 372 { 373 if (tycomplex(tyml)) 374 { 375 complex_eq87(cdb, e, pretregs); 376 return; 377 } 378 379 if (!(retregs == 0 && 380 (e2oper == OPconst || e2oper == OPvar || e2oper == OPind)) 381 ) 382 { 383 eq87(cdb,e,pretregs); 384 return; 385 } 386 if (config.target_cpu >= TARGET_PentiumPro && 387 (e2oper == OPvar || e2oper == OPind) 388 ) 389 { 390 eq87(cdb,e,pretregs); 391 return; 392 } 393 if (tyml == TYldouble || tyml == TYildouble) 394 { 395 eq87(cdb,e,pretregs); 396 return; 397 } 398 } 399 400 uint sz = _tysize[tyml]; // # of bytes to transfer 401 assert(cast(int)sz > 0); 402 403 if (retregs == 0) // if no return value 404 { 405 int fl; 406 407 /* If registers are tight, and we might need them for the lvalue, 408 * prefer to not use them for the rvalue 409 */ 410 bool plenty = true; 411 if (e1.Eoper == OPind) 412 { 413 /* Will need 1 register for evaluation, +2 registers for 414 * e1's addressing mode 415 */ 416 regm_t m = allregs & ~regcon.mvar; // mask of non-register variables 417 m &= m - 1; // clear least significant bit 418 m &= m - 1; // clear least significant bit 419 plenty = m != 0; // at least 3 registers 420 } 421 422 if ((e2oper == OPconst || // if rvalue is a constant 423 e2oper == OPrelconst && 424 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 425 ((fl = el_fl(e2)) == FLdata || 426 fl==FLudata || fl == FLextern) 427 && !(e2.EV.Vsym.ty() & mTYcs) 428 ) && 429 !(evalinregister(e2) && plenty) && 430 !e1.Ecount) // and no CSE headaches 431 { 432 // Look for special case of (*p++ = ...), where p is a register variable 433 if (e1.Eoper == OPind && 434 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 435 e11.EV.E1.Eoper == OPvar && 436 e11.EV.E1.EV.Vsym.Sfl == FLreg && 437 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 438 ) 439 { 440 Symbol *s = e11.EV.E1.EV.Vsym; 441 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 442 { 443 regcon.params &= ~s.Spregm(); 444 } 445 postinc = e11.EV.E2.EV.Vint; 446 if (e11.Eoper == OPpostdec) 447 postinc = -postinc; 448 getlvalue(cdb,&cs,e1,RMstore); 449 freenode(e11.EV.E2); 450 } 451 else 452 { 453 postinc = 0; 454 getlvalue(cdb,&cs,e1,RMstore); 455 456 if (e2oper == OPconst && 457 config.flags4 & CFG4speed && 458 (config.target_cpu == TARGET_Pentium || 459 config.target_cpu == TARGET_PentiumMMX) && 460 (cs.Irm & 0xC0) == 0x80 461 ) 462 { 463 if (I64 && sz == 8 && e2.EV.Vpointer) 464 { 465 // MOV reg,imm64 466 // MOV EA,reg 467 regm_t rregm = allregs & ~idxregm(&cs); 468 reg_t regx; 469 regwithvalue(cdb,rregm,e2.EV.Vpointer,®x,64); 470 cs.Iop = 0x89; 471 cs.Irm |= modregrm(0,regx & 7,0); 472 if (regx & 8) 473 cs.Irex |= REX_R; 474 cdb.gen(&cs); 475 freenode(e2); 476 goto Lp; 477 } 478 if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint) 479 { 480 // MOV reg,imm 481 // MOV EA,reg 482 regm_t rregm = allregs & ~idxregm(&cs); 483 reg_t regx; 484 regwithvalue(cdb,rregm,e2.EV.Vint,®x,0); 485 cs.Iop = 0x89; 486 cs.Irm |= modregrm(0,regx & 7,0); 487 if (regx & 8) 488 cs.Irex |= REX_R; 489 cdb.gen(&cs); 490 freenode(e2); 491 goto Lp; 492 } 493 if (sz == 2 * REGSIZE && e2.EV.Vllong == 0) 494 { 495 // MOV reg,imm 496 // MOV EA,reg 497 // MOV EA+2,reg 498 regm_t rregm = getscratch() & ~idxregm(&cs); 499 if (rregm) 500 { 501 reg_t regx; 502 regwithvalue(cdb,rregm,e2.EV.Vint,®x,0); 503 cs.Iop = 0x89; 504 cs.Irm |= modregrm(0,regx,0); 505 cdb.gen(&cs); 506 getlvalue_msw(&cs); 507 cdb.gen(&cs); 508 freenode(e2); 509 goto Lp; 510 } 511 } 512 } 513 } 514 515 // If loading result into a register 516 if ((cs.Irm & 0xC0) == 0xC0) 517 { 518 modEA(cdb,&cs); 519 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg) 520 getregs(cdb,cs.IEV1.Vsym.Sregm); 521 } 522 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 523 524 if (e2oper == OPrelconst) 525 { 526 cs.IEV2.Voffset = e2.EV.Voffset; 527 cs.IFL2 = cast(ubyte)fl; 528 cs.IEV2.Vsym = e2.EV.Vsym; 529 cs.Iflags |= CFoff; 530 cdb.gen(&cs); // MOV EA,&variable 531 if (I64 && sz == 8) 532 code_orrex(cdb.last(), REX_W); 533 if (sz > REGSIZE) 534 { 535 cs.Iop = 0x8C; 536 getlvalue_msw(&cs); 537 cs.Irm |= modregrm(0,3,0); 538 cdb.gen(&cs); // MOV EA+2,DS 539 } 540 } 541 else 542 { 543 assert(e2oper == OPconst); 544 cs.IFL2 = FLconst; 545 targ_size_t *p = cast(targ_size_t *) &(e2.EV); 546 cs.IEV2.Vsize_t = *p; 547 // Look for loading a register variable 548 if ((cs.Irm & 0xC0) == 0xC0) 549 { 550 reg_t regx = cs.Irm & 7; 551 552 if (cs.Irex & REX_B) 553 regx |= 8; 554 if (I64 && sz == 8) 555 movregconst(cdb,regx,*p,64); 556 else 557 movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1)); 558 if (sz == 2 * REGSIZE) 559 { getlvalue_msw(&cs); 560 if (REGSIZE == 2) 561 movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0); 562 else if (REGSIZE == 4) 563 movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0); 564 else if (REGSIZE == 8) 565 movregconst(cdb,cs.Irm & 7,p[1],0); 566 else 567 assert(0); 568 } 569 } 570 else if (I64 && sz == 8 && *p >= 0x80000000) 571 { // Use 64 bit MOV, as the 32 bit one gets sign extended 572 // MOV reg,imm64 573 // MOV EA,reg 574 regm_t rregm = allregs & ~idxregm(&cs); 575 reg_t regx; 576 regwithvalue(cdb,rregm,*p,®x,64); 577 cs.Iop = 0x89; 578 cs.Irm |= modregrm(0,regx & 7,0); 579 if (regx & 8) 580 cs.Irex |= REX_R; 581 cdb.gen(&cs); 582 } 583 else 584 { 585 int off = sz; 586 do 587 { int regsize = REGSIZE; 588 if (off >= 4 && I16 && config.target_cpu >= TARGET_80386) 589 { 590 regsize = 4; 591 cs.Iflags |= CFopsize; // use opsize to do 32 bit operation 592 } 593 else if (I64 && sz == 16 && *p >= 0x80000000) 594 { 595 regm_t rregm = allregs & ~idxregm(&cs); 596 reg_t regx; 597 regwithvalue(cdb,rregm,*p,®x,64); 598 cs.Iop = 0x89; 599 cs.Irm |= modregrm(0,regx & 7,0); 600 if (regx & 8) 601 cs.Irex |= REX_R; 602 } 603 else 604 { 605 regm_t retregsx = (sz == 1) ? BYTEREGS : allregs; 606 reg_t regx; 607 if (reghasvalue(retregsx,*p,®x)) 608 { 609 cs.Iop = (cs.Iop & 1) | 0x88; 610 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx 611 if (regx & 8) 612 cs.Irex |= REX_R; 613 if (I64 && sz == 1 && regx >= 4) 614 cs.Irex |= REX; 615 } 616 if (!I16 && off == 2) // if 16 bit operand 617 cs.Iflags |= CFopsize; 618 if (I64 && sz == 8) 619 cs.Irex |= REX_W; 620 } 621 cdb.gen(&cs); // MOV EA,const 622 623 p = cast(targ_size_t *)(cast(char *) p + regsize); 624 cs.Iop = (cs.Iop & 1) | 0xC6; 625 cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0); 626 cs.Irex &= ~REX_R; 627 cs.IEV1.Voffset += regsize; 628 cs.IEV2.Vint = cast(int)*p; 629 off -= regsize; 630 } while (off > 0); 631 } 632 } 633 freenode(e2); 634 goto Lp; 635 } 636 retregs = allregs; // pick a reg, any reg 637 if (sz == 2 * REGSIZE) 638 retregs &= ~mBP; // BP cannot be used for register pair 639 } 640 if (retregs == mPSW) 641 { 642 retregs = allregs; 643 if (sz == 2 * REGSIZE) 644 retregs &= ~mBP; // BP cannot be used for register pair 645 } 646 cs.Iop = 0x89; 647 if (sz == 1) // must have byte regs 648 { 649 cs.Iop = 0x88; 650 retregs &= BYTEREGS; 651 if (!retregs) 652 retregs = BYTEREGS; 653 } 654 else if (retregs & mES && 655 ( 656 (e1.Eoper == OPind && 657 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) || 658 (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata) 659 ) 660 ) 661 // getlvalue() needs ES, so we can't return it 662 retregs = allregs; // no conflicts with ES 663 else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0) 664 retregs = DOUBLEREGS; 665 666 regvar = false; 667 varregm = 0; 668 if (config.flags4 & CFG4optimized) 669 { 670 // Be careful of cases like (x = x+x+x). We cannot evaluate in 671 // x if x is in a register. 672 if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable 673 doinreg(e1.EV.Vsym,e2) && // and we can compute directly into it 674 !(sz == 1 && e1.EV.Voffset == 1) 675 ) 676 { 677 regvar = true; 678 retregs = varregm; 679 reg = varreg; // evaluate directly in target register 680 if (tysize(e1.Ety) == REGSIZE && 681 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE) 682 { 683 if (e1.EV.Voffset) 684 retregs &= mMSW; 685 else 686 retregs &= mLSW; 687 reg = findreg(retregs); 688 } 689 } 690 } 691 if (*pretregs & mPSW && OTleaf(e1.Eoper)) // if evaluating e1 couldn't change flags 692 { // Be careful that this lines up with jmpopcode() 693 retregs |= mPSW; 694 *pretregs &= ~mPSW; 695 } 696 scodelem(cdb,e2,&retregs,0,true); // get rvalue 697 698 // Look for special case of (*p++ = ...), where p is a register variable 699 if (e1.Eoper == OPind && 700 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 701 e11.EV.E1.Eoper == OPvar && 702 e11.EV.E1.EV.Vsym.Sfl == FLreg && 703 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 704 ) 705 { 706 Symbol *s = e11.EV.E1.EV.Vsym; 707 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 708 { 709 regcon.params &= ~s.Spregm(); 710 } 711 712 postinc = e11.EV.E2.EV.Vint; 713 if (e11.Eoper == OPpostdec) 714 postinc = -postinc; 715 getlvalue(cdb,&cs,e1,RMstore | retregs); 716 freenode(e11.EV.E2); 717 } 718 else 719 { 720 postinc = 0; 721 getlvalue(cdb,&cs,e1,RMstore | retregs); // get lvalue (cl == null if regvar) 722 } 723 724 getregs(cdb,varregm); 725 726 assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes)); 727 if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES) 728 { 729 reg = findreglsw(retregs); 730 cs.Irm |= modregrm(0,reg,0); 731 cdb.gen(&cs); // MOV EA,reg 732 getlvalue_msw(&cs); // point to where segment goes 733 cs.Iop = 0x8C; 734 NEWREG(cs.Irm,0); 735 cdb.gen(&cs); // MOV EA+2,ES 736 } 737 else 738 { 739 if (!I16) 740 { 741 reg = findreg(retregs & 742 ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS)); 743 cs.Irm |= modregrm(0,reg & 7,0); 744 if (reg & 8) 745 cs.Irex |= REX_R; 746 for (; true; sz -= REGSIZE) 747 { 748 // Do not generate mov from register onto itself 749 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0))) 750 break; 751 if (sz == 2) // if 16 bit operand 752 cs.Iflags |= CFopsize; 753 else if (sz == 1 && reg >= 4) 754 cs.Irex |= REX; 755 cdb.gen(&cs); // MOV EA+offset,reg 756 if (sz <= REGSIZE) 757 break; 758 getlvalue_msw(&cs); 759 reg = findregmsw(retregs); 760 code_newreg(&cs, reg); 761 } 762 } 763 else 764 { 765 if (sz > REGSIZE) 766 cs.IEV1.Voffset += sz - REGSIZE; // 0,2,6 767 reg = findreg(retregs & 768 (sz > REGSIZE ? mMSW : ALLREGS)); 769 if (tyml == TYdouble || tyml == TYdouble_alias) 770 reg = AX; 771 cs.Irm |= modregrm(0,reg,0); 772 // Do not generate mov from register onto itself 773 if (!regvar || reg != (cs.Irm & 7)) 774 for (; true; sz -= REGSIZE) // 1,2,4 775 { 776 cdb.gen(&cs); // MOV EA+offset,reg 777 if (sz <= REGSIZE) 778 break; 779 cs.IEV1.Voffset -= REGSIZE; 780 if (tyml == TYdouble || tyml == TYdouble_alias) 781 reg = dblreg[reg]; 782 else 783 reg = findreglsw(retregs); 784 NEWREG(cs.Irm,reg); 785 } 786 } 787 } 788 if (e1.Ecount || // if lvalue is a CSE or 789 regvar) // rvalue can't be a CSE 790 { 791 getregs_imm(cdb,retregs); // necessary if both lvalue and 792 // rvalue are CSEs (since a reg 793 // can hold only one e at a time) 794 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 795 } 796 797 fixresult(cdb,e,retregs,pretregs); 798 Lp: 799 if (postinc) 800 { 801 reg_t ireg = findreg(idxregm(&cs)); 802 if (*pretregs & mPSW) 803 { // Use LEA to avoid touching the flags 804 uint rm = cs.Irm & 7; 805 if (cs.Irex & REX_B) 806 rm |= 8; 807 cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc); 808 if (tysize(e11.EV.E1.Ety) == 8) 809 code_orrex(cdb.last(), REX_W); 810 } 811 else if (I64) 812 { 813 cdb.genc2(0x81,modregrmx(3,0,ireg),postinc); 814 if (tysize(e11.EV.E1.Ety) == 8) 815 code_orrex(cdb.last(), REX_W); 816 } 817 else 818 { 819 if (postinc == 1) 820 cdb.gen1(0x40 + ireg); // INC ireg 821 else if (postinc == -cast(targ_int)1) 822 cdb.gen1(0x48 + ireg); // DEC ireg 823 else 824 { 825 cdb.genc2(0x81,modregrm(3,0,ireg),postinc); 826 } 827 } 828 } 829 freenode(e1); 830 } 831 832 833 /************************ 834 * Generate code for += -= &= |= ^= negass 835 */ 836 837 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 838 { 839 //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 840 OPER op = e.Eoper; 841 regm_t retregs = 0; 842 uint reverse = 0; 843 elem *e1 = e.EV.E1; 844 tym_t tyml = tybasic(e1.Ety); // type of lvalue 845 int sz = _tysize[tyml]; 846 int isbyte = (sz == 1); // 1 for byte operation, else 0 847 848 // See if evaluate in XMM registers 849 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0)) 850 { 851 xmmopass(cdb,e,pretregs); 852 return; 853 } 854 855 if (tyfloating(tyml)) 856 { 857 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 858 { 859 if (op == OPnegass) 860 cdnegass87(cdb,e,pretregs); 861 else 862 opass87(cdb,e,pretregs); 863 } 864 else 865 { 866 if (op == OPnegass) 867 opnegassdbl(cdb,e,pretregs); 868 else 869 opassdbl(cdb,e,pretregs,op); 870 } 871 return; 872 } 873 uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386) 874 ? CFopsize : 0; 875 uint cflags = 0; 876 regm_t forccs = *pretregs & mPSW; // return result in flags 877 regm_t forregs = *pretregs & ~mPSW; // return result in regs 878 // true if we want the result in a register 879 uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper)); 880 881 reg_t reg; 882 uint op1,op2,mode; 883 code cs; 884 elem *e2; 885 regm_t varregm; 886 reg_t varreg; 887 uint jop; 888 889 890 switch (op) // select instruction opcodes 891 { 892 case OPpostinc: op = OPaddass; // i++ => += 893 goto case OPaddass; 894 895 case OPaddass: op1 = 0x01; op2 = 0x11; 896 cflags = CFpsw; 897 mode = 0; break; // ADD, ADC 898 899 case OPpostdec: op = OPminass; // i-- => -= 900 goto case OPminass; 901 902 case OPminass: op1 = 0x29; op2 = 0x19; 903 cflags = CFpsw; 904 mode = 5; break; // SUB, SBC 905 906 case OPandass: op1 = op2 = 0x21; 907 mode = 4; break; // AND, AND 908 909 case OPorass: op1 = op2 = 0x09; 910 mode = 1; break; // OR , OR 911 912 case OPxorass: op1 = op2 = 0x31; 913 mode = 6; break; // XOR, XOR 914 915 case OPnegass: op1 = 0xF7; // NEG 916 break; 917 918 default: 919 assert(0); 920 } 921 op1 ^= isbyte; // bit 0 is 0 for byte operation 922 923 if (op == OPnegass) 924 { 925 getlvalue(cdb,&cs,e1,0); 926 modEA(cdb,&cs); 927 cs.Irm |= modregrm(0,3,0); 928 cs.Iop = op1; 929 switch (_tysize[tyml]) 930 { 931 case CHARSIZE: 932 cdb.gen(&cs); 933 break; 934 935 case SHORTSIZE: 936 cdb.gen(&cs); 937 if (!I16 && *pretregs & mPSW) 938 cdb.last().Iflags |= CFopsize | CFpsw; 939 break; 940 941 case LONGSIZE: 942 if (!I16 || opsize) 943 { cdb.gen(&cs); 944 cdb.last().Iflags |= opsize; 945 break; 946 } 947 neg_2reg: 948 getlvalue_msw(&cs); 949 cdb.gen(&cs); // NEG EA+2 950 getlvalue_lsw(&cs); 951 cdb.gen(&cs); // NEG EA 952 code_orflag(cdb.last(),CFpsw); 953 cs.Iop = 0x81; 954 getlvalue_msw(&cs); 955 cs.IFL2 = FLconst; 956 cs.IEV2.Vuns = 0; 957 cdb.gen(&cs); // SBB EA+2,0 958 break; 959 960 case LLONGSIZE: 961 if (I16) 962 assert(0); // not implemented yet 963 if (I32) 964 goto neg_2reg; 965 cdb.gen(&cs); 966 break; 967 968 default: 969 assert(0); 970 } 971 forccs = 0; // flags already set by NEG 972 *pretregs &= ~mPSW; 973 } 974 else if ((e2 = e.EV.E2).Eoper == OPconst && // if rvalue is a const 975 el_signx32(e2) && 976 // Don't evaluate e2 in register if we can use an INC or DEC 977 (((sz <= REGSIZE || tyfv(tyml)) && 978 (op == OPaddass || op == OPminass) && 979 (el_allbits(e2, 1) || el_allbits(e2, -1)) 980 ) || 981 (!evalinregister(e2) 982 && tyml != TYhptr 983 ) 984 ) 985 ) 986 { 987 getlvalue(cdb,&cs,e1,0); 988 modEA(cdb,&cs); 989 cs.IFL2 = FLconst; 990 cs.IEV2.Vsize_t = e2.EV.Vint; 991 if (sz <= REGSIZE || tyfv(tyml) || opsize) 992 { 993 targ_int i = cs.IEV2.Vint; 994 995 // Handle shortcuts. Watch out for if result has 996 // to be in flags. 997 998 if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,®) && i != 1 && i != -1 && 999 !opsize) 1000 { 1001 cs.Iop = op1; 1002 cs.Irm |= modregrm(0,reg & 7,0); 1003 if (I64) 1004 { if (isbyte && reg >= 4) 1005 cs.Irex |= REX; 1006 if (reg & 8) 1007 cs.Irex |= REX_R; 1008 } 1009 } 1010 else 1011 { 1012 cs.Iop = 0x81; 1013 cs.Irm |= modregrm(0,mode,0); 1014 switch (op) 1015 { 1016 case OPminass: // convert to += 1017 cs.Irm ^= modregrm(0,5,0); 1018 i = -i; 1019 cs.IEV2.Vsize_t = i; 1020 goto case OPaddass; 1021 1022 case OPaddass: 1023 if (i == 1) // INC EA 1024 goto L1; 1025 else if (i == -1) // DEC EA 1026 { cs.Irm |= modregrm(0,1,0); 1027 L1: cs.Iop = 0xFF; 1028 } 1029 break; 1030 1031 default: 1032 break; 1033 } 1034 cs.Iop ^= isbyte; // for byte operations 1035 } 1036 cs.Iflags |= opsize; 1037 if (forccs) 1038 cs.Iflags |= CFpsw; 1039 else if (!I16 && cs.Iflags & CFopsize) 1040 { 1041 switch (op) 1042 { case OPorass: 1043 case OPxorass: 1044 cs.IEV2.Vsize_t &= 0xFFFF; 1045 cs.Iflags &= ~CFopsize; // don't worry about MSW 1046 break; 1047 1048 case OPandass: 1049 cs.IEV2.Vsize_t |= ~0xFFFFL; 1050 cs.Iflags &= ~CFopsize; // don't worry about MSW 1051 break; 1052 1053 case OPminass: 1054 case OPaddass: 1055 static if (1) 1056 { 1057 if ((cs.Irm & 0xC0) == 0xC0) // EA is register 1058 cs.Iflags &= ~CFopsize; 1059 } 1060 else 1061 { 1062 if ((cs.Irm & 0xC0) == 0xC0 && // EA is register and 1063 e1.Eoper == OPind) // not a register var 1064 cs.Iflags &= ~CFopsize; 1065 } 1066 break; 1067 1068 default: 1069 assert(0); 1070 } 1071 } 1072 1073 // For scheduling purposes, we wish to replace: 1074 // OP EA 1075 // with: 1076 // MOV reg,EA 1077 // OP reg 1078 // MOV EA,reg 1079 if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 1080 (config.target_cpu == TARGET_Pentium || 1081 config.target_cpu == TARGET_PentiumMMX) && 1082 config.flags4 & CFG4speed) 1083 { 1084 regm_t sregm; 1085 code cs2; 1086 1087 // Determine which registers to use 1088 sregm = allregs & ~idxregm(&cs); 1089 if (isbyte) 1090 sregm &= BYTEREGS; 1091 if (sregm & forregs) 1092 sregm &= forregs; 1093 1094 allocreg(cdb,&sregm,®,tyml); // allocate register 1095 1096 cs2 = cs; 1097 cs2.Iflags &= ~CFpsw; 1098 cs2.Iop = 0x8B ^ isbyte; 1099 code_newreg(&cs2, reg); 1100 cdb.gen(&cs2); // MOV reg,EA 1101 1102 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7); 1103 if (reg & 8) 1104 cs.Irex |= REX_B; 1105 cdb.gen(&cs); // OP reg 1106 1107 cs2.Iop ^= 2; 1108 cdb.gen(&cs2); // MOV EA,reg 1109 1110 retregs = sregm; 1111 wantres = 0; 1112 if (e1.Ecount) 1113 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1114 } 1115 else 1116 { 1117 cdb.gen(&cs); 1118 cs.Iflags &= ~opsize; 1119 cs.Iflags &= ~CFpsw; 1120 if (I16 && opsize) // if DWORD operand 1121 cs.IEV1.Voffset += 2; // compensate for wantres code 1122 } 1123 } 1124 else if (sz == 2 * REGSIZE) 1125 { 1126 targ_uns msw; 1127 1128 cs.Iop = 0x81; 1129 cs.Irm |= modregrm(0,mode,0); 1130 cs.Iflags |= cflags; 1131 cdb.gen(&cs); 1132 cs.Iflags &= ~CFpsw; 1133 1134 getlvalue_msw(&cs); // point to msw 1135 msw = cast(uint)MSREG(e.EV.E2.EV.Vllong); 1136 cs.IEV2.Vuns = msw; // msw of constant 1137 switch (op) 1138 { 1139 case OPminass: 1140 cs.Irm ^= modregrm(0,6,0); // SUB => SBB 1141 break; 1142 1143 case OPaddass: 1144 cs.Irm |= modregrm(0,2,0); // ADD => ADC 1145 break; 1146 1147 default: 1148 break; 1149 } 1150 cdb.gen(&cs); 1151 } 1152 else 1153 assert(0); 1154 freenode(e.EV.E2); // don't need it anymore 1155 } 1156 else if (isregvar(e1,&varregm,&varreg) && 1157 (e2.Eoper == OPvar || e2.Eoper == OPind) && 1158 !evalinregister(e2) && 1159 sz <= REGSIZE) // deal with later 1160 { 1161 getlvalue(cdb,&cs,e2,0); 1162 freenode(e2); 1163 getregs(cdb,varregm); 1164 code_newreg(&cs, varreg); 1165 if (I64 && sz == 1 && varreg >= 4) 1166 cs.Irex |= REX; 1167 cs.Iop = op1 ^ 2; // toggle direction bit 1168 if (forccs) 1169 cs.Iflags |= CFpsw; 1170 reverse = 2; // remember we toggled it 1171 cdb.gen(&cs); 1172 retregs = 0; // to trigger a bug if we attempt to use it 1173 } 1174 else if ((op == OPaddass || op == OPminass) && 1175 sz <= REGSIZE && 1176 !e2.Ecount && 1177 ((jop = jmpopcode(e2)) == JC || jop == JNC || 1178 (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC))) 1179 ) 1180 { 1181 /* e1 += (x < y) ADC EA,0 1182 * e1 -= (x < y) SBB EA,0 1183 * e1 += (x >= y) SBB EA,-1 1184 * e1 -= (x >= y) ADC EA,-1 1185 */ 1186 getlvalue(cdb,&cs,e1,0); // get lvalue 1187 modEA(cdb,&cs); 1188 regm_t keepmsk = idxregm(&cs); 1189 retregs = mPSW; 1190 if (OTconv(e2.Eoper)) 1191 { 1192 scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true); 1193 freenode(e2); 1194 } 1195 else 1196 scodelem(cdb,e2,&retregs,keepmsk,true); 1197 cs.Iop = 0x81 ^ isbyte; // ADC EA,imm16/32 1198 uint regop = 2; // ADC 1199 if ((op == OPaddass) ^ (jop == JC)) 1200 regop = 3; // SBB 1201 code_newreg(&cs,regop); 1202 cs.Iflags |= opsize; 1203 if (forccs) 1204 cs.Iflags |= CFpsw; 1205 cs.IFL2 = FLconst; 1206 cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0; 1207 cdb.gen(&cs); 1208 retregs = 0; // to trigger a bug if we attempt to use it 1209 } 1210 else // evaluate e2 into register 1211 { 1212 retregs = (isbyte) ? BYTEREGS : ALLREGS; // pick working reg 1213 if (tyml == TYhptr) 1214 retregs &= ~mCX; // need CX for shift count 1215 scodelem(cdb,e.EV.E2,&retregs,0,true); // get rvalue 1216 getlvalue(cdb,&cs,e1,retregs); // get lvalue 1217 modEA(cdb,&cs); 1218 cs.Iop = op1; 1219 if (sz <= REGSIZE || tyfv(tyml)) 1220 { 1221 reg = findreg(retregs); 1222 code_newreg(&cs, reg); // OP1 EA,reg 1223 if (sz == 1 && reg >= 4 && I64) 1224 cs.Irex |= REX; 1225 if (forccs) 1226 cs.Iflags |= CFpsw; 1227 } 1228 else if (tyml == TYhptr) 1229 { 1230 uint mreg = findregmsw(retregs); 1231 uint lreg = findreglsw(retregs); 1232 getregs(cdb,retregs | mCX); 1233 1234 // If h -= l, convert to h += -l 1235 if (e.Eoper == OPminass) 1236 { 1237 cdb.gen2(0xF7,modregrm(3,3,mreg)); // NEG mreg 1238 cdb.gen2(0xF7,modregrm(3,3,lreg)); // NEG lreg 1239 code_orflag(cdb.last(),CFpsw); 1240 cdb.genc2(0x81,modregrm(3,3,mreg),0); // SBB mreg,0 1241 } 1242 cs.Iop = 0x01; 1243 cs.Irm |= modregrm(0,lreg,0); 1244 cdb.gen(&cs); // ADD EA,lreg 1245 code_orflag(cdb.last(),CFpsw); 1246 cdb.genc2(0x81,modregrm(3,2,mreg),0); // ADC mreg,0 1247 genshift(cdb); // MOV CX,offset __AHSHIFT 1248 cdb.gen2(0xD3,modregrm(3,4,mreg)); // SHL mreg,CL 1249 NEWREG(cs.Irm,mreg); // ADD EA+2,mreg 1250 getlvalue_msw(&cs); 1251 } 1252 else if (sz == 2 * REGSIZE) 1253 { 1254 cs.Irm |= modregrm(0,findreglsw(retregs),0); 1255 cdb.gen(&cs); // OP1 EA,reg+1 1256 code_orflag(cdb.last(),cflags); 1257 cs.Iop = op2; 1258 NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg 1259 getlvalue_msw(&cs); 1260 } 1261 else 1262 assert(0); 1263 cdb.gen(&cs); 1264 retregs = 0; // to trigger a bug if we attempt to use it 1265 } 1266 1267 // See if we need to reload result into a register. 1268 // Need result in registers in case we have a 32 bit 1269 // result and we want the flags as a result. 1270 if (wantres || (sz > REGSIZE && forccs)) 1271 { 1272 if (sz <= REGSIZE) 1273 { 1274 regm_t possregs; 1275 1276 possregs = ALLREGS; 1277 if (isbyte) 1278 possregs = BYTEREGS; 1279 retregs = forregs & possregs; 1280 if (!retregs) 1281 retregs = possregs; 1282 1283 // If reg field is destination 1284 if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5) 1285 { 1286 reg = (cs.Irm >> 3) & 7; 1287 if (cs.Irex & REX_R) 1288 reg |= 8; 1289 retregs = mask(reg); 1290 allocreg(cdb,&retregs,®,tyml); 1291 } 1292 // If lvalue is a register, just use that register 1293 else if ((cs.Irm & 0xC0) == 0xC0) 1294 { 1295 reg = cs.Irm & 7; 1296 if (cs.Irex & REX_B) 1297 reg |= 8; 1298 retregs = mask(reg); 1299 allocreg(cdb,&retregs,®,tyml); 1300 } 1301 else 1302 { 1303 allocreg(cdb,&retregs,®,tyml); 1304 cs.Iop = 0x8B ^ isbyte ^ reverse; 1305 code_newreg(&cs, reg); 1306 if (I64 && isbyte && reg >= 4) 1307 cs.Irex |= REX_W; 1308 cdb.gen(&cs); // MOV reg,EA 1309 } 1310 } 1311 else if (tyfv(tyml) || tyml == TYhptr) 1312 { 1313 regm_t idxregs; 1314 1315 if (tyml == TYhptr) 1316 getlvalue_lsw(&cs); 1317 idxregs = idxregm(&cs); 1318 retregs = forregs & ~idxregs; 1319 if (!(retregs & IDXREGS)) 1320 retregs |= IDXREGS & ~idxregs; 1321 if (!(retregs & mMSW)) 1322 retregs |= mMSW & ALLREGS; 1323 allocreg(cdb,&retregs,®,tyml); 1324 NEWREG(cs.Irm,findreglsw(retregs)); 1325 if (retregs & mES) // if want ES loaded 1326 { 1327 cs.Iop = 0xC4; 1328 cdb.gen(&cs); // LES lreg,EA 1329 } 1330 else 1331 { 1332 cs.Iop = 0x8B; 1333 cdb.gen(&cs); // MOV lreg,EA 1334 getlvalue_msw(&cs); 1335 if (I32) 1336 cs.Iflags |= CFopsize; 1337 NEWREG(cs.Irm,reg); 1338 cdb.gen(&cs); // MOV mreg,EA+2 1339 } 1340 } 1341 else if (sz == 2 * REGSIZE) 1342 { 1343 regm_t idx = idxregm(&cs); 1344 retregs = forregs; 1345 if (!retregs) 1346 retregs = ALLREGS; 1347 allocreg(cdb,&retregs,®,tyml); 1348 cs.Iop = 0x8B; 1349 NEWREG(cs.Irm,reg); 1350 1351 code csl = cs; 1352 NEWREG(csl.Irm,findreglsw(retregs)); 1353 getlvalue_lsw(&csl); 1354 1355 if (mask(reg) & idx) 1356 { 1357 cdb.gen(&csl); // MOV reg+1,EA 1358 cdb.gen(&cs); // MOV reg,EA+2 1359 } 1360 else 1361 { 1362 cdb.gen(&cs); // MOV reg,EA+2 1363 cdb.gen(&csl); // MOV reg+1,EA 1364 } 1365 } 1366 else 1367 assert(0); 1368 if (e1.Ecount) // if we gen a CSE 1369 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1370 } 1371 freenode(e1); 1372 if (sz <= REGSIZE) 1373 *pretregs &= ~mPSW; // flags are already set 1374 fixresult(cdb,e,retregs,pretregs); 1375 } 1376 1377 /******************************** 1378 * Generate code for *= /= %= 1379 */ 1380 1381 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1382 { 1383 code cs; 1384 regm_t retregs; 1385 reg_t resreg; 1386 reg_t reg; 1387 uint opr,lib,isbyte; 1388 1389 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1390 elem *e1 = e.EV.E1; 1391 elem *e2 = e.EV.E2; 1392 OPER op = e.Eoper; // OPxxxx 1393 1394 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1395 char uns = tyuns(tyml) || tyuns(e2.Ety); 1396 uint sz = _tysize[tyml]; 1397 1398 uint rex = (I64 && sz == 8) ? REX_W : 0; 1399 uint grex = rex << 16; // 64 bit operands 1400 1401 // See if evaluate in XMM registers 1402 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0)) 1403 { 1404 xmmopass(cdb,e,pretregs); 1405 return; 1406 } 1407 1408 if (tyfloating(tyml)) 1409 { 1410 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1411 { 1412 opass87(cdb,e,pretregs); 1413 } 1414 else 1415 { 1416 opassdbl(cdb,e,pretregs,op); 1417 } 1418 return; 1419 } 1420 1421 if (sz <= REGSIZE) // if word or byte 1422 { 1423 isbyte = (sz == 1); // 1 for byte operation 1424 resreg = AX; // result register for * or / 1425 if (uns) // if uint operation 1426 opr = 4; // MUL 1427 else // else signed 1428 opr = 5; // IMUL 1429 if (op != OPmulass) // if /= or %= 1430 { 1431 opr += 2; // MUL => DIV, IMUL => IDIV 1432 if (op == OPmodass) 1433 resreg = DX; // remainder is in DX 1434 } 1435 if (op == OPmulass) // if multiply 1436 { 1437 if (config.target_cpu >= TARGET_80286 && 1438 e2.Eoper == OPconst && !isbyte) 1439 { 1440 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1441 if (I64 && sz == 8 && e2factor != cast(int)e2factor) 1442 goto L1; 1443 freenode(e2); 1444 getlvalue(cdb,&cs,e1,0); // get EA 1445 regm_t idxregs = idxregm(&cs); 1446 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs; 1447 if (!retregs) 1448 retregs = ALLREGS & ~idxregs; 1449 allocreg(cdb,&retregs,&resreg,tyml); 1450 cs.Iop = 0x69; // IMUL reg,EA,e2value 1451 cs.IFL2 = FLconst; 1452 cs.IEV2.Vint = cast(int)e2factor; 1453 opr = resreg; 1454 } 1455 else if (!I16 && !isbyte) 1456 { 1457 L1: 1458 retregs = *pretregs & (ALLREGS | mBP); 1459 if (!retregs) 1460 retregs = ALLREGS; 1461 codelem(cdb,e2,&retregs,false); // load rvalue in reg 1462 getlvalue(cdb,&cs,e1,retregs); // get EA 1463 getregs(cdb,retregs); // destroy these regs 1464 cs.Iop = 0x0FAF; // IMUL resreg,EA 1465 resreg = findreg(retregs); 1466 opr = resreg; 1467 } 1468 else 1469 { 1470 retregs = mAX; 1471 codelem(cdb,e2,&retregs,false); // load rvalue in AX 1472 getlvalue(cdb,&cs,e1,mAX); // get EA 1473 getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs 1474 cs.Iop = 0xF7 ^ isbyte; // [I]MUL EA 1475 } 1476 code_newreg(&cs,opr); 1477 cdb.gen(&cs); 1478 } 1479 else // /= or %= 1480 { 1481 targ_size_t e2factor; 1482 int pow2; 1483 1484 assert(!isbyte); // should never happen 1485 assert(I16 || sz != SHORTSIZE); 1486 if (config.flags4 & CFG4speed && 1487 e2.Eoper == OPconst && !uns && 1488 (sz == REGSIZE || (I64 && sz == 4)) && 1489 (pow2 = ispow2(e2factor = cast(targ_size_t)el_tolong(e2))) != -1 && 1490 e2factor == cast(int)e2factor && 1491 !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass) 1492 ) 1493 { 1494 // Signed divide or modulo by power of 2 1495 getlvalue(cdb,&cs,e1,mAX | mDX); 1496 cs.Iop = 0x8B; 1497 code_newreg(&cs, AX); 1498 cdb.gen(&cs); // MOV AX,EA 1499 freenode(e2); 1500 getregs(cdb,mAX | mDX); // trash these regs 1501 cdb.gen1(0x99); // CWD 1502 code_orrex(cdb.last(), rex); 1503 if (pow2 == 1) 1504 { 1505 if (op == OPdivass) 1506 { 1507 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1508 cdb.gen2(0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 1509 resreg = AX; 1510 } 1511 else // OPmod 1512 { 1513 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1514 cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1 1515 cdb.gen2(0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 1516 resreg = DX; 1517 } 1518 } 1519 else 1520 { 1521 assert(pow2 < 32); 1522 targ_ulong m = (1 << pow2) - 1; 1523 if (op == OPdivass) 1524 { 1525 cdb.genc2(0x81,grex | modregrm(3,4,DX),m); // AND DX,m 1526 cdb.gen2(0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 1527 // Be careful not to generate this for 8088 1528 assert(config.target_cpu >= TARGET_80286); 1529 cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 1530 resreg = AX; 1531 } 1532 else // OPmodass 1533 { 1534 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1535 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1536 cdb.genc2(0x81,grex | modregrm(3,4,AX),m); // AND AX,m 1537 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 1538 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 1539 resreg = AX; 1540 } 1541 } 1542 } 1543 else 1544 { 1545 retregs = ALLREGS & ~(mAX|mDX); // DX gets sign extension 1546 codelem(cdb,e2,&retregs,false); // load rvalue in retregs 1547 reg = findreg(retregs); 1548 getlvalue(cdb,&cs,e1,mAX | mDX | retregs); // get EA 1549 getregs(cdb,mAX | mDX); // destroy these regs 1550 cs.Irm |= modregrm(0,AX,0); 1551 cs.Iop = 0x8B; 1552 cdb.gen(&cs); // MOV AX,EA 1553 if (uns) // if uint 1554 movregconst(cdb,DX,0,0); // CLR DX 1555 else // else signed 1556 { cdb.gen1(0x99); // CWD 1557 code_orrex(cdb.last(),rex); 1558 } 1559 getregs(cdb,mDX | mAX); // DX and AX will be destroyed 1560 genregs(cdb,0xF7,opr,reg); // OPR reg 1561 code_orrex(cdb.last(),rex); 1562 } 1563 } 1564 cs.Iop = 0x89 ^ isbyte; 1565 code_newreg(&cs,resreg); 1566 cdb.gen(&cs); // MOV EA,resreg 1567 if (e1.Ecount) // if we gen a CSE 1568 cssave(e1,mask(resreg),!OTleaf(e1.Eoper)); 1569 freenode(e1); 1570 fixresult(cdb,e,mask(resreg),pretregs); 1571 return; 1572 } 1573 else if (sz == 2 * REGSIZE) 1574 { 1575 lib = CLIB.lmul; 1576 if (op == OPdivass || op == OPmodass) 1577 { 1578 lib = (uns) ? CLIB.uldiv : CLIB.ldiv; 1579 if (op == OPmodass) 1580 lib++; 1581 } 1582 retregs = mCX | mBX; 1583 codelem(cdb,e2,&retregs,false); 1584 getlvalue(cdb,&cs,e1,mDX|mAX | mCX|mBX); 1585 getregs(cdb,mDX | mAX); 1586 cs.Iop = 0x8B; 1587 cdb.gen(&cs); // MOV AX,EA 1588 getlvalue_msw(&cs); 1589 cs.Irm |= modregrm(0,DX,0); 1590 cdb.gen(&cs); // MOV DX,EA+2 1591 getlvalue_lsw(&cs); 1592 retregs = mDX | mAX; 1593 if (config.target_cpu >= TARGET_PentiumPro && op == OPmulass) 1594 { 1595 /* IMUL ECX,EAX 1596 IMUL EDX,EBX 1597 ADD ECX,EDX 1598 MUL EBX 1599 ADD EDX,ECX 1600 */ 1601 getregs(cdb,mAX|mDX|mCX); 1602 cdb.gen2(0x0FAF,modregrm(3,CX,AX)); 1603 cdb.gen2(0x0FAF,modregrm(3,DX,BX)); 1604 cdb.gen2(0x03,modregrm(3,CX,DX)); 1605 cdb.gen2(0xF7,modregrm(3,4,BX)); 1606 cdb.gen2(0x03,modregrm(3,DX,CX)); 1607 } 1608 else 1609 { 1610 if (op == OPmodass) 1611 retregs = mBX | mCX; 1612 callclib(cdb,e,lib,&retregs,idxregm(&cs)); 1613 } 1614 reg = findreglsw(retregs); 1615 cs.Iop = 0x89; 1616 NEWREG(cs.Irm,reg); 1617 cdb.gen(&cs); // MOV EA,lsreg 1618 reg = findregmsw(retregs); 1619 NEWREG(cs.Irm,reg); 1620 getlvalue_msw(&cs); 1621 cdb.gen(&cs); // MOV EA+2,msreg 1622 if (e1.Ecount) // if we gen a CSE 1623 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1624 freenode(e1); 1625 fixresult(cdb,e,retregs,pretregs); 1626 return; 1627 } 1628 else 1629 { 1630 assert(0); 1631 } 1632 } 1633 1634 1635 /******************************** 1636 * Generate code for <<= and >>= 1637 */ 1638 1639 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1640 { 1641 code cs; 1642 regm_t retregs; 1643 uint op1,op2; 1644 reg_t reg; 1645 1646 elem *e1 = e.EV.E1; 1647 elem *e2 = e.EV.E2; 1648 1649 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1650 uint sz = _tysize[tyml]; 1651 uint isbyte = tybyte(e.Ety) != 0; // 1 for byte operations 1652 tym_t tym = tybasic(e.Ety); // type of result 1653 OPER oper = e.Eoper; 1654 assert(tysize(e2.Ety) <= REGSIZE); 1655 1656 uint rex = (I64 && sz == 8) ? REX_W : 0; 1657 1658 // if our lvalue is a cse, make sure we evaluate for result in register 1659 if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,®)) 1660 *pretregs |= ALLREGS; 1661 1662 version (SCPP) 1663 { 1664 // Do this until the rest of the compiler does OPshr/OPashr correctly 1665 if (oper == OPshrass) 1666 oper = tyuns(tyml) ? OPshrass : OPashrass; 1667 } 1668 1669 // Select opcodes. op2 is used for msw for long shifts. 1670 1671 switch (oper) 1672 { 1673 case OPshlass: 1674 op1 = 4; // SHL 1675 op2 = 2; // RCL 1676 break; 1677 1678 case OPshrass: 1679 op1 = 5; // SHR 1680 op2 = 3; // RCR 1681 break; 1682 1683 case OPashrass: 1684 op1 = 7; // SAR 1685 op2 = 3; // RCR 1686 break; 1687 1688 default: 1689 assert(0); 1690 } 1691 1692 1693 uint v = 0xD3; // for SHIFT xx,CL cases 1694 uint loopcnt = 1; 1695 uint conste2 = false; 1696 uint shiftcnt = 0; // avoid "use before initialized" warnings 1697 if (e2.Eoper == OPconst) 1698 { 1699 conste2 = true; // e2 is a constant 1700 shiftcnt = e2.EV.Vint; // byte ordering of host 1701 if (config.target_cpu >= TARGET_80286 && 1702 sz <= REGSIZE && 1703 shiftcnt != 1) 1704 v = 0xC1; // SHIFT xx,shiftcnt 1705 else if (shiftcnt <= 3) 1706 { 1707 loopcnt = shiftcnt; 1708 v = 0xD1; // SHIFT xx,1 1709 } 1710 } 1711 1712 if (v == 0xD3) // if COUNT == CL 1713 { 1714 retregs = mCX; 1715 codelem(cdb,e2,&retregs,false); 1716 } 1717 else 1718 freenode(e2); 1719 getlvalue(cdb,&cs,e1,mCX); // get lvalue, preserve CX 1720 modEA(cdb,&cs); // check for modifying register 1721 1722 if (*pretregs == 0 || // if don't return result 1723 (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) || 1724 sz > REGSIZE 1725 ) 1726 { 1727 retregs = 0; // value not returned in a register 1728 cs.Iop = v ^ isbyte; 1729 while (loopcnt--) 1730 { 1731 NEWREG(cs.Irm,op1); // make sure op1 is first 1732 if (sz <= REGSIZE) 1733 { 1734 if (conste2) 1735 { 1736 cs.IFL2 = FLconst; 1737 cs.IEV2.Vint = shiftcnt; 1738 } 1739 cdb.gen(&cs); // SHIFT EA,[CL|1] 1740 if (*pretregs & mPSW && !loopcnt && conste2) 1741 code_orflag(cdb.last(),CFpsw); 1742 } 1743 else // TYlong 1744 { 1745 cs.Iop = 0xD1; // plain shift 1746 code *ce = gennop(null); // ce: NOP 1747 if (v == 0xD3) 1748 { 1749 getregs(cdb,mCX); 1750 if (!conste2) 1751 { 1752 assert(loopcnt == 0); 1753 genjmp(cdb,JCXZ,FLcode,cast(block *) ce); // JCXZ ce 1754 } 1755 } 1756 code *cg; 1757 if (oper == OPshlass) 1758 { 1759 cdb.gen(&cs); // cg: SHIFT EA 1760 cg = cdb.last(); 1761 code_orflag(cg,CFpsw); 1762 getlvalue_msw(&cs); 1763 NEWREG(cs.Irm,op2); 1764 cdb.gen(&cs); // SHIFT EA 1765 getlvalue_lsw(&cs); 1766 } 1767 else 1768 { 1769 getlvalue_msw(&cs); 1770 cdb.gen(&cs); 1771 cg = cdb.last(); 1772 code_orflag(cg,CFpsw); 1773 NEWREG(cs.Irm,op2); 1774 getlvalue_lsw(&cs); 1775 cdb.gen(&cs); 1776 } 1777 if (v == 0xD3) // if building a loop 1778 { 1779 genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg 1780 regimmed_set(CX,0); // note that now CX == 0 1781 } 1782 cdb.append(ce); 1783 } 1784 } 1785 1786 // If we want the result, we must load it from the EA 1787 // into a register. 1788 1789 if (sz == 2 * REGSIZE && *pretregs) 1790 { 1791 retregs = *pretregs & (ALLREGS | mBP); 1792 if (retregs) 1793 { 1794 retregs &= ~idxregm(&cs); 1795 allocreg(cdb,&retregs,®,tym); 1796 cs.Iop = 0x8B; 1797 1798 // be careful not to trash any index regs 1799 // do MSW first (which can't be an index reg) 1800 getlvalue_msw(&cs); 1801 NEWREG(cs.Irm,reg); 1802 cdb.gen(&cs); 1803 getlvalue_lsw(&cs); 1804 reg = findreglsw(retregs); 1805 NEWREG(cs.Irm,reg); 1806 cdb.gen(&cs); 1807 if (*pretregs & mPSW) 1808 tstresult(cdb,retregs,tyml,true); 1809 } 1810 else // flags only 1811 { 1812 retregs = ALLREGS & ~idxregm(&cs); 1813 allocreg(cdb,&retregs,®,TYint); 1814 cs.Iop = 0x8B; 1815 NEWREG(cs.Irm,reg); 1816 cdb.gen(&cs); // MOV reg,EA 1817 cs.Iop = 0x0B; // OR reg,EA+2 1818 cs.Iflags |= CFpsw; 1819 getlvalue_msw(&cs); 1820 cdb.gen(&cs); 1821 } 1822 } 1823 } 1824 else // else must evaluate in register 1825 { 1826 if (sz <= REGSIZE) 1827 { 1828 regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs); 1829 if (isbyte) 1830 possregs &= BYTEREGS; 1831 retregs = *pretregs & possregs; 1832 if (retregs == 0) 1833 retregs = possregs; 1834 allocreg(cdb,&retregs,®,tym); 1835 cs.Iop = 0x8B ^ isbyte; 1836 code_newreg(&cs, reg); 1837 if (isbyte && I64 && (reg >= 4)) 1838 cs.Irex |= REX; 1839 cdb.gen(&cs); // MOV reg,EA 1840 if (!I16) 1841 { 1842 assert(!isbyte || (mask(reg) & BYTEREGS)); 1843 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt); 1844 if (isbyte && I64 && (reg >= 4)) 1845 cdb.last().Irex |= REX; 1846 code_orrex(cdb.last(), rex); 1847 // We can do a 32 bit shift on a 16 bit operand if 1848 // it's a left shift and we're not concerned about 1849 // the flags. Remember that flags are not set if 1850 // a shift of 0 occurs. 1851 if (_tysize[tym] == SHORTSIZE && 1852 (oper == OPshrass || oper == OPashrass || 1853 (*pretregs & mPSW && conste2))) 1854 cdb.last().Iflags |= CFopsize; // 16 bit operand 1855 } 1856 else 1857 { 1858 while (loopcnt--) 1859 { // Generate shift instructions. 1860 cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt); 1861 } 1862 } 1863 if (*pretregs & mPSW && conste2) 1864 { 1865 assert(shiftcnt); 1866 *pretregs &= ~mPSW; // result is already in flags 1867 code_orflag(cdb.last(),CFpsw); 1868 } 1869 1870 cs.Iop = 0x89 ^ isbyte; 1871 if (isbyte && I64 && (reg >= 4)) 1872 cs.Irex |= REX; 1873 cdb.gen(&cs); // MOV EA,reg 1874 1875 // If result is not in correct register 1876 fixresult(cdb,e,retregs,pretregs); 1877 retregs = *pretregs; 1878 } 1879 else 1880 assert(0); 1881 } 1882 if (e1.Ecount && !(retregs & regcon.mvar)) // if lvalue is a CSE 1883 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1884 freenode(e1); 1885 *pretregs = retregs; 1886 } 1887 1888 1889 /********************************** 1890 * Generate code for compares. 1891 * Handles lt,gt,le,ge,eqeq,ne for all data types. 1892 */ 1893 1894 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1895 { 1896 regm_t retregs,rretregs; 1897 reg_t reg,rreg; 1898 int fl; 1899 1900 //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs)); 1901 // Collect extra parameter. This is pretty ugly... 1902 int flag = cdcmp_flag; 1903 cdcmp_flag = 0; 1904 1905 elem *e1 = e.EV.E1; 1906 elem *e2 = e.EV.E2; 1907 if (*pretregs == 0) // if don't want result 1908 { 1909 codelem(cdb,e1,pretregs,false); 1910 *pretregs = 0; // in case e1 changed it 1911 codelem(cdb,e2,pretregs,false); 1912 return; 1913 } 1914 1915 uint jop = jmpopcode(e); // must be computed before 1916 // leaves are free'd 1917 uint reverse = 0; 1918 1919 OPER op = e.Eoper; 1920 assert(OTrel(op)); 1921 bool eqorne = (op == OPeqeq) || (op == OPne); 1922 1923 tym_t tym = tybasic(e1.Ety); 1924 uint sz = _tysize[tym]; 1925 uint isbyte = sz == 1; 1926 1927 uint rex = (I64 && sz == 8) ? REX_W : 0; 1928 uint grex = rex << 16; // 64 bit operands 1929 1930 code cs; 1931 code *ce; 1932 if (tyfloating(tym)) // if floating operation 1933 { 1934 if (config.fpxmmregs) 1935 { 1936 retregs = mPSW; 1937 if (tyxmmreg(tym)) 1938 orthxmm(cdb,e,&retregs); 1939 else 1940 orth87(cdb,e,&retregs); 1941 } 1942 else if (config.inline8087) 1943 { retregs = mPSW; 1944 orth87(cdb,e,&retregs); 1945 } 1946 else 1947 { 1948 static if (TARGET_WINDOS) 1949 { 1950 int clib; 1951 1952 retregs = 0; /* skip result for now */ 1953 if (iffalse(e2)) /* second operand is constant 0 */ 1954 { 1955 assert(!eqorne); /* should be OPbool or OPnot */ 1956 if (tym == TYfloat) 1957 { 1958 retregs = FLOATREGS; 1959 clib = CLIB.ftst0; 1960 } 1961 else 1962 { 1963 retregs = DOUBLEREGS; 1964 clib = CLIB.dtst0; 1965 } 1966 if (rel_exception(op)) 1967 clib += CLIB.dtst0exc - CLIB.dtst0; 1968 codelem(cdb,e1,&retregs,false); 1969 retregs = 0; 1970 callclib(cdb,e,clib,&retregs,0); 1971 freenode(e2); 1972 } 1973 else 1974 { 1975 clib = CLIB.dcmp; 1976 if (rel_exception(op)) 1977 clib += CLIB.dcmpexc - CLIB.dcmp; 1978 opdouble(cdb,e,&retregs,clib); 1979 } 1980 } 1981 else 1982 { 1983 assert(0); 1984 } 1985 } 1986 goto L3; 1987 } 1988 1989 /* If it's a signed comparison of longs, we have to call a library */ 1990 /* routine, because we don't know the target of the signed branch */ 1991 /* (have to set up flags so that jmpopcode() will do it right) */ 1992 if (!eqorne && 1993 (I16 && tym == TYlong && tybasic(e2.Ety) == TYlong || 1994 I32 && tym == TYllong && tybasic(e2.Ety) == TYllong) 1995 ) 1996 { 1997 assert(jop != JC && jop != JNC); 1998 retregs = mDX | mAX; 1999 codelem(cdb,e1,&retregs,false); 2000 retregs = mCX | mBX; 2001 scodelem(cdb,e2,&retregs,mDX | mAX,false); 2002 2003 if (I16) 2004 { 2005 retregs = 0; 2006 callclib(cdb,e,CLIB.lcmp,&retregs,0); // gross, but it works 2007 } 2008 else 2009 { 2010 /* Generate: 2011 * CMP EDX,ECX 2012 * JNE C1 2013 * XOR EDX,EDX 2014 * CMP EAX,EBX 2015 * JZ C1 2016 * JA C3 2017 * DEC EDX 2018 * JMP C1 2019 * C3: INC EDX 2020 * C1: 2021 */ 2022 getregs(cdb,mDX); 2023 genregs(cdb,0x39,CX,DX); // CMP EDX,ECX 2024 code *c1 = gennop(null); 2025 genjmp(cdb,JNE,FLcode,cast(block *)c1); // JNE C1 2026 movregconst(cdb,DX,0,0); // XOR EDX,EDX 2027 genregs(cdb,0x39,BX,AX); // CMP EAX,EBX 2028 genjmp(cdb,JE,FLcode,cast(block *)c1); // JZ C1 2029 code *c3 = gen1(null,0x40 + DX); // INC EDX 2030 genjmp(cdb,JA,FLcode,cast(block *)c3); // JA C3 2031 cdb.gen1(0x48 + DX); // DEC EDX 2032 genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1 2033 cdb.append(c3); 2034 cdb.append(c1); 2035 getregs(cdb,mDX); 2036 retregs = mPSW; 2037 } 2038 goto L3; 2039 } 2040 2041 /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC 2042 * (This is already reflected in the jop) 2043 */ 2044 if ((jop == JC || jop == JNC) && 2045 (op == OPgt || op == OPle) && 2046 (tyuns(tym) || tyuns(e2.Ety)) 2047 ) 2048 { // jmpopcode() sez comparison should be reversed 2049 assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst); 2050 reverse ^= 2; 2051 } 2052 2053 /* See if we should swap operands */ 2054 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 2055 { 2056 e1 = e.EV.E2; 2057 e2 = e.EV.E1; 2058 reverse ^= 2; 2059 } 2060 2061 retregs = allregs; 2062 if (isbyte) 2063 retregs = BYTEREGS; 2064 2065 ce = null; 2066 cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 2067 cs.Irex = cast(ubyte)rex; 2068 if (sz > REGSIZE) 2069 ce = gennop(ce); 2070 2071 switch (e2.Eoper) 2072 { 2073 default: 2074 L2: 2075 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2076 rretregs = allregs & ~retregs; 2077 if (isbyte) 2078 rretregs &= BYTEREGS; 2079 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 2080 if (sz <= REGSIZE) // CMP reg,rreg 2081 { 2082 reg = findreg(retregs); // get reg that e1 is in 2083 rreg = findreg(rretregs); 2084 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg); 2085 code_orrex(cdb.last(), rex); 2086 if (!I16 && sz == SHORTSIZE) 2087 cdb.last().Iflags |= CFopsize; // compare only 16 bits 2088 if (I64 && isbyte && (reg >= 4 || rreg >= 4)) 2089 cdb.last().Irex |= REX; // address byte registers 2090 } 2091 else 2092 { 2093 assert(sz <= 2 * REGSIZE); 2094 2095 // Compare MSW, if they're equal then compare the LSW 2096 reg = findregmsw(retregs); 2097 rreg = findregmsw(rretregs); 2098 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2099 if (I32 && sz == 6) 2100 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2101 else if (I64) 2102 code_orrex(cdb.last(), REX_W); 2103 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2104 2105 reg = findreglsw(retregs); 2106 rreg = findreglsw(rretregs); 2107 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2108 if (I64) 2109 code_orrex(cdb.last(), REX_W); 2110 } 2111 break; 2112 2113 case OPrelconst: 2114 if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) 2115 goto L2; 2116 fl = el_fl(e2); 2117 switch (fl) 2118 { 2119 case FLfunc: 2120 fl = FLextern; // so it won't be self-relative 2121 break; 2122 2123 case FLdata: 2124 case FLudata: 2125 case FLextern: 2126 if (sz > REGSIZE) // compare against DS, not DGROUP 2127 goto L2; 2128 break; 2129 2130 case FLfardata: 2131 break; 2132 2133 default: 2134 goto L2; 2135 } 2136 cs.IFL2 = cast(ubyte)fl; 2137 cs.IEV2.Vsym = e2.EV.Vsym; 2138 if (sz > REGSIZE) 2139 { 2140 cs.Iflags |= CFseg; 2141 cs.IEV2.Voffset = 0; 2142 } 2143 else 2144 { 2145 cs.Iflags |= CFoff; 2146 cs.IEV2.Voffset = e2.EV.Voffset; 2147 } 2148 goto L4; 2149 2150 case OPconst: 2151 // If compare against 0 2152 if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) && 2153 isregvar(e1,&retregs,®) 2154 ) 2155 { // Just do a TEST instruction 2156 genregs(cdb,0x85 ^ isbyte,reg,reg); // TEST reg,reg 2157 cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw; 2158 code_orrex(cdb.last(), rex); 2159 if (I64 && isbyte && reg >= 4) 2160 cdb.last().Irex |= REX; // address byte registers 2161 retregs = mPSW; 2162 break; 2163 } 2164 2165 if (!tyuns(tym) && !tyuns(e2.Ety) && 2166 !boolres(e2) && !(*pretregs & mPSW) && 2167 (sz == REGSIZE || (I64 && sz == 4)) && 2168 (!I16 || op == OPlt || op == OPge)) 2169 { 2170 assert(*pretregs & (allregs)); 2171 codelem(cdb,e1,pretregs,false); 2172 reg = findreg(*pretregs); 2173 getregs(cdb,mask(reg)); 2174 switch (op) 2175 { 2176 case OPle: 2177 cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1); // ADD reg,-1 2178 code_orflag(cdb.last(), CFpsw); 2179 cdb.genc2(0x81,grex | modregrmx(3,2,reg),0); // ADC reg,0 2180 goto oplt; 2181 2182 case OPgt: 2183 cdb.gen2(0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2184 /* Flips the sign bit unless the value is 0 or int.min. 2185 Also sets the carry bit when the value is not 0. */ 2186 code_orflag(cdb.last(), CFpsw); 2187 cdb.genc2(0x81,grex | modregrmx(3,3,reg),0); // SBB reg,0 2188 /* Subtracts the carry bit. This turns int.min into 2189 int.max, flipping the sign bit. 2190 For other negative and positive values, subtracting 1 2191 doesn't affect the sign bit. 2192 For 0, the carry bit is not set, so this does nothing 2193 and the sign bit is not affected. */ 2194 goto oplt; 2195 2196 case OPlt: 2197 oplt: 2198 // Get the sign bit, i.e. 1 if the value is negative. 2199 if (!I16) 2200 cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31 2201 else 2202 { /* 8088-286 do not have a barrel shifter, so use this 2203 faster sequence 2204 */ 2205 genregs(cdb,0xD1,0,reg); // ROL reg,1 2206 reg_t regi; 2207 if (reghasvalue(allregs,1,®i)) 2208 genregs(cdb,0x23,reg,regi); // AND reg,regi 2209 else 2210 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1 2211 } 2212 break; 2213 2214 case OPge: 2215 genregs(cdb,0xD1,4,reg); // SHL reg,1 2216 code_orrex(cdb.last(),rex); 2217 code_orflag(cdb.last(), CFpsw); 2218 genregs(cdb,0x19,reg,reg); // SBB reg,reg 2219 code_orrex(cdb.last(),rex); 2220 if (I64) 2221 { 2222 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 2223 code_orrex(cdb.last(), rex); 2224 } 2225 else 2226 cdb.gen1(0x40 + reg); // INC reg 2227 break; 2228 2229 default: 2230 assert(0); 2231 } 2232 freenode(e2); 2233 goto ret; 2234 } 2235 2236 cs.IFL2 = FLconst; 2237 if (sz == 16) 2238 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.msw; 2239 else if (sz > REGSIZE) 2240 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); 2241 else 2242 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong; 2243 2244 // The cmp immediate relies on sign extension of the 32 bit immediate value 2245 if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint) 2246 goto L2; 2247 L4: 2248 cs.Iop = 0x81 ^ isbyte; 2249 2250 /* if ((e1 is data or a '*' reference) and it's not a 2251 * common subexpression 2252 */ 2253 2254 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 2255 e1.Eoper == OPind) && 2256 !evalinregister(e1)) 2257 { 2258 getlvalue(cdb,&cs,e1,RMload); 2259 freenode(e1); 2260 if (evalinregister(e2)) 2261 { 2262 retregs = idxregm(&cs); 2263 if ((cs.Iflags & CFSEG) == CFes) 2264 retregs |= mES; // take no chances 2265 rretregs = allregs & ~retregs; 2266 if (isbyte) 2267 rretregs &= BYTEREGS; 2268 scodelem(cdb,e2,&rretregs,retregs,true); 2269 cs.Iop = 0x39 ^ isbyte ^ reverse; 2270 if (sz > REGSIZE) 2271 { 2272 rreg = findregmsw(rretregs); 2273 cs.Irm |= modregrm(0,rreg,0); 2274 getlvalue_msw(&cs); 2275 cdb.gen(&cs); // CMP EA+2,rreg 2276 if (I32 && sz == 6) 2277 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2278 if (I64 && isbyte && rreg >= 4) 2279 cdb.last().Irex |= REX; 2280 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2281 rreg = findreglsw(rretregs); 2282 NEWREG(cs.Irm,rreg); 2283 getlvalue_lsw(&cs); 2284 } 2285 else 2286 { 2287 rreg = findreg(rretregs); 2288 code_newreg(&cs, rreg); 2289 if (I64 && isbyte && rreg >= 4) 2290 cs.Irex |= REX; 2291 } 2292 } 2293 else 2294 { 2295 cs.Irm |= modregrm(0,7,0); 2296 if (sz > REGSIZE) 2297 { 2298 if (sz == 6) 2299 assert(0); 2300 if (e2.Eoper == OPrelconst) 2301 { cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg; 2302 cs.IEV2.Voffset = 0; 2303 } 2304 getlvalue_msw(&cs); 2305 cdb.gen(&cs); // CMP EA+2,const 2306 if (!I16 && sz == 6) 2307 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2308 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop 2309 if (e2.Eoper == OPconst) 2310 cs.IEV2.Vint = cast(int)e2.EV.Vllong; 2311 else if (e2.Eoper == OPrelconst) 2312 { // Turn off CFseg, on CFoff 2313 cs.Iflags ^= CFseg | CFoff; 2314 cs.IEV2.Voffset = e2.EV.Voffset; 2315 } 2316 else 2317 assert(0); 2318 getlvalue_lsw(&cs); 2319 } 2320 freenode(e2); 2321 } 2322 cdb.gen(&cs); 2323 break; 2324 } 2325 2326 if (evalinregister(e2) && !OTassign(e1.Eoper) && 2327 !isregvar(e1,null,null)) 2328 { 2329 regm_t m; 2330 2331 m = allregs & ~regcon.mvar; 2332 if (isbyte) 2333 m &= BYTEREGS; 2334 if (m & (m - 1)) // if more than one free register 2335 goto L2; 2336 } 2337 if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) && 2338 !boolres(e2) && !evalinregister(e1)) 2339 { 2340 retregs = mPSW; 2341 scodelem(cdb,e1,&retregs,0,false); 2342 freenode(e2); 2343 break; 2344 } 2345 if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW) 2346 { 2347 retregs |= mPSW; 2348 scodelem(cdb,e1,&retregs,0,false); 2349 freenode(e2); 2350 break; 2351 } 2352 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2353 if (sz == 1) 2354 { 2355 reg = findreg(retregs & allregs); // get reg that e1 is in 2356 cs.Irm = modregrm(3,7,reg & 7); 2357 if (reg & 8) 2358 cs.Irex |= REX_B; 2359 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg) 2360 { assert(reg < 4); 2361 cs.Irm |= 4; // use upper register half 2362 } 2363 if (I64 && reg >= 4) 2364 cs.Irex |= REX; // address byte registers 2365 } 2366 else if (sz <= REGSIZE) 2367 { // CMP reg,const 2368 reg = findreg(retregs & allregs); // get reg that e1 is in 2369 rretregs = allregs & ~retregs; 2370 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg)) 2371 { 2372 genregs(cdb,0x3B,reg,rreg); 2373 code_orrex(cdb.last(), rex); 2374 if (!I16) 2375 cdb.last().Iflags |= cs.Iflags & CFopsize; 2376 freenode(e2); 2377 break; 2378 } 2379 cs.Irm = modregrm(3,7,reg & 7); 2380 if (reg & 8) 2381 cs.Irex |= REX_B; 2382 } 2383 else if (sz <= 2 * REGSIZE) 2384 { 2385 reg = findregmsw(retregs); // get reg that e1 is in 2386 cs.Irm = modregrm(3,7,reg); 2387 cdb.gen(&cs); // CMP reg,MSW 2388 if (I32 && sz == 6) 2389 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2390 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 2391 2392 reg = findreglsw(retregs); 2393 cs.Irm = modregrm(3,7,reg); 2394 if (e2.Eoper == OPconst) 2395 cs.IEV2.Vint = e2.EV.Vlong; 2396 else if (e2.Eoper == OPrelconst) 2397 { // Turn off CFseg, on CFoff 2398 cs.Iflags ^= CFseg | CFoff; 2399 cs.IEV2.Voffset = e2.EV.Voffset; 2400 } 2401 else 2402 assert(0); 2403 } 2404 else 2405 assert(0); 2406 cdb.gen(&cs); // CMP sucreg,LSW 2407 freenode(e2); 2408 break; 2409 2410 case OPind: 2411 if (e2.Ecount) 2412 goto L2; 2413 goto L5; 2414 2415 case OPvar: 2416 static if (TARGET_OSX) 2417 { 2418 if (movOnly(e2)) 2419 goto L2; 2420 } 2421 if ((e1.Eoper == OPvar && 2422 isregvar(e2,&rretregs,®) && 2423 sz <= REGSIZE 2424 ) || 2425 (e1.Eoper == OPind && 2426 isregvar(e2,&rretregs,®) && 2427 !evalinregister(e1) && 2428 sz <= REGSIZE 2429 ) 2430 ) 2431 { 2432 // CMP EA,e2 2433 getlvalue(cdb,&cs,e1,RMload); 2434 freenode(e1); 2435 cs.Iop = 0x39 ^ isbyte ^ reverse; 2436 code_newreg(&cs,reg); 2437 if (I64 && isbyte && reg >= 4) 2438 cs.Irex |= REX; // address byte registers 2439 cdb.gen(&cs); 2440 freenode(e2); 2441 break; 2442 } 2443 L5: 2444 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2445 if (sz <= REGSIZE) // CMP reg,EA 2446 { 2447 reg = findreg(retregs & allregs); // get reg that e1 is in 2448 uint opsize = cs.Iflags & CFopsize; 2449 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0); 2450 code_orflag(cdb.last(),opsize); 2451 } 2452 else if (sz <= 2 * REGSIZE) 2453 { 2454 reg = findregmsw(retregs); // get reg that e1 is in 2455 // CMP reg,EA 2456 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0); 2457 if (I32 && sz == 6) 2458 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2459 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 2460 reg = findreglsw(retregs); 2461 if (e2.Eoper == OPind) 2462 { 2463 NEWREG(cs.Irm,reg); 2464 getlvalue_lsw(&cs); 2465 cdb.gen(&cs); 2466 } 2467 else 2468 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0); 2469 } 2470 else 2471 assert(0); 2472 freenode(e2); 2473 break; 2474 } 2475 cdb.append(ce); 2476 2477 L3: 2478 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 2479 { 2480 if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00)) 2481 { 2482 regm_t resregs = retregs; 2483 if (!I64) 2484 { 2485 resregs &= BYTEREGS; 2486 if (!resregs) 2487 resregs = BYTEREGS; 2488 } 2489 allocreg(cdb,&resregs,®,TYint); 2490 cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg 2491 if (I64 && reg >= 4) 2492 code_orrex(cdb.last(),REX); 2493 if (tysize(e.Ety) > 1) 2494 { 2495 genregs(cdb,0x0FB6,reg,reg); // MOVZX reg,reg 2496 if (I64 && sz == 8) 2497 code_orrex(cdb.last(),REX_W); 2498 if (I64 && reg >= 4) 2499 code_orrex(cdb.last(),REX); 2500 } 2501 *pretregs &= ~mPSW; 2502 fixresult(cdb,e,resregs,pretregs); 2503 } 2504 else 2505 { 2506 code *nop = null; 2507 regm_t save = regcon.immed.mval; 2508 allocreg(cdb,&retregs,®,TYint); 2509 regcon.immed.mval = save; 2510 if ((*pretregs & mPSW) == 0 && 2511 (jop == JC || jop == JNC)) 2512 { 2513 getregs(cdb,retregs); 2514 genregs(cdb,0x19,reg,reg); // SBB reg,reg 2515 if (rex) 2516 code_orrex(cdb.last(), rex); 2517 if (flag) 2518 { } // cdcond() will handle it 2519 else if (jop == JNC) 2520 { 2521 if (I64) 2522 { 2523 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 2524 code_orrex(cdb.last(), rex); 2525 } 2526 else 2527 cdb.gen1(0x40 + reg); // INC reg 2528 } 2529 else 2530 { 2531 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 2532 code_orrex(cdb.last(), rex); 2533 } 2534 } 2535 else if (I64 && sz == 8) 2536 { 2537 assert(!flag); 2538 movregconst(cdb,reg,1,64|8); // MOV reg,1 2539 nop = gennop(nop); 2540 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 2541 // MOV reg,0 2542 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64); 2543 regcon.immed.mval &= ~mask(reg); 2544 } 2545 else 2546 { 2547 assert(!flag); 2548 movregconst(cdb,reg,1,8); // MOV reg,1 2549 nop = gennop(nop); 2550 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 2551 // MOV reg,0 2552 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0); 2553 regcon.immed.mval &= ~mask(reg); 2554 } 2555 *pretregs = retregs; 2556 cdb.append(nop); 2557 } 2558 } 2559 ret: 2560 { } 2561 } 2562 2563 2564 /********************************** 2565 * Generate code for signed compare of longs. 2566 * Input: 2567 * targ block* or code* 2568 */ 2569 2570 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ) 2571 { 2572 // <= > < >= 2573 static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ]; 2574 static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ]; 2575 2576 //printf("longcmp(e = %p)\n", e); 2577 elem *e1 = e.EV.E1; 2578 elem *e2 = e.EV.E2; 2579 OPER op = e.Eoper; 2580 2581 // See if we should swap operands 2582 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 2583 { 2584 e1 = e.EV.E2; 2585 e2 = e.EV.E1; 2586 op = swaprel(op); 2587 } 2588 2589 code cs; 2590 cs.Iflags = 0; 2591 cs.Irex = 0; 2592 2593 code *ce = gennop(null); 2594 regm_t retregs = ALLREGS; 2595 regm_t rretregs; 2596 reg_t reg,rreg; 2597 2598 uint jop = jopmsw[op - OPle]; 2599 if (!(jcond & 1)) jop ^= (JL ^ JG); // toggle jump condition 2600 CodeBuilder cdbjmp; 2601 cdbjmp.ctor(); 2602 genjmp(cdbjmp,jop,fltarg, cast(block *) targ); // Jx targ 2603 genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce); // Jy nop 2604 2605 switch (e2.Eoper) 2606 { 2607 default: 2608 L2: 2609 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2610 rretregs = ALLREGS & ~retregs; 2611 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 2612 cse_flush(cdb,1); 2613 // Compare MSW, if they're equal then compare the LSW 2614 reg = findregmsw(retregs); 2615 rreg = findregmsw(rretregs); 2616 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 2617 cdb.append(cdbjmp); 2618 2619 reg = findreglsw(retregs); 2620 rreg = findreglsw(rretregs); 2621 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 2622 break; 2623 2624 case OPconst: 2625 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); // MSW first 2626 cs.IFL2 = FLconst; 2627 cs.Iop = 0x81; 2628 2629 /* if ((e1 is data or a '*' reference) and it's not a 2630 * common subexpression 2631 */ 2632 2633 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 2634 e1.Eoper == OPind) && 2635 !evalinregister(e1)) 2636 { 2637 getlvalue(cdb,&cs,e1,0); 2638 freenode(e1); 2639 if (evalinregister(e2)) 2640 { 2641 retregs = idxregm(&cs); 2642 if ((cs.Iflags & CFSEG) == CFes) 2643 retregs |= mES; // take no chances 2644 rretregs = ALLREGS & ~retregs; 2645 scodelem(cdb,e2,&rretregs,retregs,true); 2646 cse_flush(cdb,1); 2647 rreg = findregmsw(rretregs); 2648 cs.Iop = 0x39; 2649 cs.Irm |= modregrm(0,rreg,0); 2650 getlvalue_msw(&cs); 2651 cdb.gen(&cs); // CMP EA+2,rreg 2652 cdb.append(cdbjmp); 2653 rreg = findreglsw(rretregs); 2654 NEWREG(cs.Irm,rreg); 2655 } 2656 else 2657 { 2658 cse_flush(cdb,1); 2659 cs.Irm |= modregrm(0,7,0); 2660 getlvalue_msw(&cs); 2661 cdb.gen(&cs); // CMP EA+2,const 2662 cdb.append(cdbjmp); 2663 cs.IEV2.Vint = e2.EV.Vlong; 2664 freenode(e2); 2665 } 2666 getlvalue_lsw(&cs); 2667 cdb.gen(&cs); // CMP EA,rreg/const 2668 break; 2669 } 2670 if (evalinregister(e2)) 2671 goto L2; 2672 2673 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2674 cse_flush(cdb,1); 2675 reg = findregmsw(retregs); // get reg that e1 is in 2676 cs.Irm = modregrm(3,7,reg); 2677 2678 cdb.gen(&cs); // CMP reg,MSW 2679 cdb.append(cdbjmp); 2680 reg = findreglsw(retregs); 2681 cs.Irm = modregrm(3,7,reg); 2682 cs.IEV2.Vint = e2.EV.Vlong; 2683 cdb.gen(&cs); // CMP sucreg,LSW 2684 freenode(e2); 2685 break; 2686 2687 case OPvar: 2688 if (!e1.Ecount && e1.Eoper == OPs32_64) 2689 { 2690 reg_t msreg; 2691 2692 retregs = allregs; 2693 scodelem(cdb,e1.EV.E1,&retregs,0,true); 2694 freenode(e1); 2695 reg = findreg(retregs); 2696 retregs = allregs & ~retregs; 2697 allocreg(cdb,&retregs,&msreg,TYint); 2698 genmovreg(cdb,msreg,reg); // MOV msreg,reg 2699 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 2700 cse_flush(cdb,1); 2701 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0); 2702 cdb.append(cdbjmp); 2703 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0); 2704 freenode(e2); 2705 } 2706 else 2707 { 2708 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2709 cse_flush(cdb,1); 2710 reg = findregmsw(retregs); // get reg that e1 is in 2711 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0); 2712 cdb.append(cdbjmp); 2713 reg = findreglsw(retregs); 2714 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0); 2715 freenode(e2); 2716 } 2717 break; 2718 } 2719 2720 jop = joplsw[op - OPle]; 2721 if (!(jcond & 1)) jop ^= 1; // toggle jump condition 2722 genjmp(cdb,jop,fltarg,cast(block *) targ); // Jcond targ 2723 2724 cdb.append(ce); 2725 freenode(e); 2726 } 2727 2728 /***************************** 2729 * Do conversions. 2730 * Depends on OPd_s32 and CLIB.dbllng being in sequence. 2731 */ 2732 2733 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 2734 { 2735 //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs)); 2736 //elem_print(e); 2737 2738 static immutable ubyte[2][16] clib = 2739 [ 2740 [ OPd_s32, CLIB.dbllng ], 2741 [ OPs32_d, CLIB.lngdbl ], 2742 [ OPd_s16, CLIB.dblint ], 2743 [ OPs16_d, CLIB.intdbl ], 2744 [ OPd_u16, CLIB.dbluns ], 2745 [ OPu16_d, CLIB.unsdbl ], 2746 [ OPd_u32, CLIB.dblulng ], 2747 [ OPu32_d, CLIB.ulngdbl ], 2748 [ OPd_s64, CLIB.dblllng ], 2749 [ OPs64_d, CLIB.llngdbl ], 2750 [ OPd_u64, CLIB.dblullng ], 2751 [ OPu64_d, CLIB.ullngdbl ], 2752 [ OPd_f, CLIB.dblflt ], 2753 [ OPf_d, CLIB.fltdbl ], 2754 [ OPvp_fp, CLIB.vptrfptr ], 2755 [ OPcvp_fp, CLIB.cvptrfptr] 2756 ]; 2757 2758 if (!*pretregs) 2759 { 2760 codelem(cdb,e.EV.E1,pretregs,false); 2761 return; 2762 } 2763 2764 regm_t retregs; 2765 if (config.inline8087) 2766 { 2767 switch (e.Eoper) 2768 { 2769 case OPld_d: 2770 case OPd_ld: 2771 { 2772 if (tycomplex(e.EV.E1.Ety)) 2773 { 2774 Lcomplex: 2775 regm_t retregsx = mST01 | (*pretregs & mPSW); 2776 codelem(cdb,e.EV.E1, &retregsx, false); 2777 fixresult_complex87(cdb, e, retregsx, pretregs); 2778 return; 2779 } 2780 regm_t retregsx = mST0 | (*pretregs & mPSW); 2781 codelem(cdb,e.EV.E1, &retregsx, false); 2782 fixresult87(cdb, e, retregsx, pretregs); 2783 return; 2784 } 2785 2786 case OPf_d: 2787 case OPd_f: 2788 if (tycomplex(e.EV.E1.Ety)) 2789 goto Lcomplex; 2790 if (config.fpxmmregs && *pretregs & XMMREGS) 2791 { 2792 xmmcnvt(cdb, e, pretregs); 2793 return; 2794 } 2795 2796 /* if won't do us much good to transfer back and */ 2797 /* forth between 8088 registers and 8087 registers */ 2798 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs)) 2799 { 2800 retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety); 2801 if (retregs & (mXMM1 | mXMM0 |mST01 | mST0)) // if return in ST0 2802 { 2803 codelem(cdb,e.EV.E1,pretregs,false); 2804 if (*pretregs & mST0) 2805 note87(e, 0, 0); 2806 return; 2807 } 2808 else 2809 break; 2810 } 2811 goto Lload87; 2812 2813 case OPs64_d: 2814 if (!I64) 2815 goto Lload87; 2816 goto case OPs32_d; 2817 2818 case OPs32_d: 2819 if (config.fpxmmregs && *pretregs & XMMREGS) 2820 { 2821 xmmcnvt(cdb, e, pretregs); 2822 return; 2823 } 2824 goto Lload87; 2825 2826 case OPs16_d: 2827 case OPu16_d: 2828 Lload87: 2829 load87(cdb,e,0,pretregs,null,-1); 2830 return; 2831 2832 case OPu32_d: 2833 if (I64 && config.fpxmmregs && *pretregs & XMMREGS) 2834 { 2835 xmmcnvt(cdb,e,pretregs); 2836 return; 2837 } 2838 else if (!I16) 2839 { 2840 regm_t retregsx = ALLREGS; 2841 codelem(cdb,e.EV.E1, &retregsx, false); 2842 reg_t reg = findreg(retregsx); 2843 cdb.genfltreg(0x89, reg, 0); 2844 regwithvalue(cdb,ALLREGS,0,®,0); 2845 cdb.genfltreg(0x89, reg, 4); 2846 2847 push87(cdb); 2848 cdb.genfltreg(0xDF,5,0); // FILD m64int 2849 2850 regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/; 2851 fixresult87(cdb, e, retregsy, pretregs); 2852 return; 2853 } 2854 break; 2855 2856 case OPd_s64: 2857 if (!I64) 2858 goto Lcnvt87; 2859 goto case OPd_s32; 2860 2861 case OPd_s32: 2862 if (config.fpxmmregs) 2863 { 2864 xmmcnvt(cdb,e,pretregs); 2865 return; 2866 } 2867 goto Lcnvt87; 2868 2869 case OPd_s16: 2870 case OPd_u16: 2871 Lcnvt87: 2872 cnvt87(cdb,e,pretregs); 2873 return; 2874 2875 case OPd_u32: // use subroutine, not 8087 2876 if (I64 && config.fpxmmregs) 2877 { 2878 xmmcnvt(cdb,e,pretregs); 2879 return; 2880 } 2881 if (I32 || I64) 2882 { 2883 cdd_u32(cdb,e,pretregs); 2884 return; 2885 } 2886 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || 2887 TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2888 { 2889 retregs = mST0; 2890 } 2891 else 2892 { 2893 retregs = DOUBLEREGS; 2894 } 2895 goto L1; 2896 2897 case OPd_u64: 2898 if (I32 || I64) 2899 { 2900 cdd_u64(cdb,e,pretregs); 2901 return; 2902 } 2903 retregs = DOUBLEREGS; 2904 goto L1; 2905 2906 case OPu64_d: 2907 if (*pretregs & mST0) 2908 { 2909 regm_t retregsx = I64 ? mAX : mAX|mDX; 2910 codelem(cdb,e.EV.E1,&retregsx,false); 2911 callclib(cdb,e,CLIB.u64_ldbl,pretregs,0); 2912 return; 2913 } 2914 break; 2915 2916 case OPld_u64: 2917 { 2918 if (I32 || I64) 2919 { 2920 cdd_u64(cdb,e,pretregs); 2921 return; 2922 } 2923 regm_t retregsx = mST0; 2924 codelem(cdb,e.EV.E1,&retregsx,false); 2925 callclib(cdb,e,CLIB.ld_u64,pretregs,0); 2926 return; 2927 } 2928 2929 default: 2930 break; 2931 } 2932 } 2933 retregs = regmask(e.EV.E1.Ety, TYnfunc); 2934 L1: 2935 codelem(cdb,e.EV.E1,&retregs,false); 2936 for (int i = 0; 1; i++) 2937 { 2938 assert(i < clib.length); 2939 if (clib[i][0] == e.Eoper) 2940 { 2941 callclib(cdb,e,clib[i][1],pretregs,0); 2942 break; 2943 } 2944 } 2945 } 2946 2947 2948 /*************************** 2949 * Convert short to long. 2950 * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64, 2951 * OPu64_128, OPs64_128 2952 */ 2953 2954 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2955 { 2956 reg_t reg; 2957 regm_t retregs; 2958 2959 //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 2960 int e1comsub = e.EV.E1.Ecount; 2961 ubyte op = e.Eoper; 2962 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 2963 { 2964 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 2965 return; 2966 } 2967 else if ( 2968 op == OPnp_fp || 2969 (I16 && op == OPu16_32) || 2970 (I32 && op == OPu32_64) 2971 ) 2972 { 2973 /* Result goes into a register pair. 2974 * Zero extend by putting a zero into most significant reg. 2975 */ 2976 2977 regm_t retregsx = *pretregs & mLSW; 2978 assert(retregsx); 2979 tym_t tym1 = tybasic(e.EV.E1.Ety); 2980 codelem(cdb,e.EV.E1,&retregsx,false); 2981 2982 regm_t regm = *pretregs & (mMSW & ALLREGS); 2983 if (regm == 0) // *pretregs could be mES 2984 regm = mMSW & ALLREGS; 2985 allocreg(cdb,®m,®,TYint); 2986 if (e1comsub) 2987 getregs(cdb,retregsx); 2988 if (op == OPnp_fp) 2989 { 2990 int segreg; 2991 2992 // BUG: what about pointers to functions? 2993 switch (tym1) 2994 { 2995 case TYimmutPtr: 2996 case TYnptr: segreg = SEG_DS; break; 2997 case TYcptr: segreg = SEG_CS; break; 2998 case TYsptr: segreg = SEG_SS; break; 2999 default: assert(0); 3000 } 3001 cdb.gen2(0x8C,modregrm(3,segreg,reg)); // MOV reg,segreg 3002 } 3003 else 3004 movregconst(cdb,reg,0,0); // 0 extend 3005 3006 fixresult(cdb,e,retregsx | regm,pretregs); 3007 return; 3008 } 3009 else if (I64 && op == OPu32_64) 3010 { 3011 elem *e1 = e.EV.E1; 3012 retregs = *pretregs; 3013 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3014 { 3015 code cs; 3016 3017 allocreg(cdb,&retregs,®,TYint); 3018 loadea(cdb,e1,&cs,0x8B,reg,0,retregs,retregs); // MOV Ereg,EA 3019 freenode(e1); 3020 } 3021 else 3022 { 3023 *pretregs &= ~mPSW; // flags are set by eval of e1 3024 codelem(cdb,e1,&retregs,false); 3025 /* Determine if high 32 bits are already 0 3026 */ 3027 if (e1.Eoper == OPu16_32 && !e1.Ecount) 3028 { 3029 } 3030 else 3031 { 3032 // Zero high 32 bits 3033 getregs(cdb,retregs); 3034 reg = findreg(retregs); 3035 // Don't use x89 because that will get optimized away 3036 genregs(cdb,0x8B,reg,reg); // MOV Ereg,Ereg 3037 } 3038 } 3039 fixresult(cdb,e,retregs,pretregs); 3040 return; 3041 } 3042 else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount) 3043 { 3044 /* Due to how e1 is calculated, the high 32 bits of the register 3045 * are already 0. 3046 */ 3047 retregs = *pretregs; 3048 codelem(cdb,e.EV.E1,&retregs,false); 3049 fixresult(cdb,e,retregs,pretregs); 3050 return; 3051 } 3052 else if (!I16 && (op == OPs16_32 || op == OPu16_32) || 3053 I64 && op == OPs32_64) 3054 { 3055 elem *e11; 3056 elem *e1 = e.EV.E1; 3057 3058 if (e1.Eoper == OPu8_16 && !e1.Ecount && 3059 ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount)) 3060 ) 3061 { 3062 code cs; 3063 3064 retregs = *pretregs & BYTEREGS; 3065 if (!retregs) 3066 retregs = BYTEREGS; 3067 allocreg(cdb,&retregs,®,TYint); 3068 movregconst(cdb,reg,0,0); // XOR reg,reg 3069 loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs); // MOV regL,EA 3070 freenode(e11); 3071 freenode(e1); 3072 } 3073 else if (e1.Eoper == OPvar || 3074 (e1.Eoper == OPind && !e1.Ecount)) 3075 { 3076 code cs = void; 3077 3078 if (I32 && op == OPu16_32 && config.flags4 & CFG4speed) 3079 goto L2; 3080 retregs = *pretregs; 3081 allocreg(cdb,&retregs,®,TYint); 3082 const opcode = (op == OPu16_32) ? 0x0FB7 : 0x0FBF; // MOVZX/MOVSX reg,EA 3083 if (op == OPs32_64) 3084 { 3085 assert(I64); 3086 // MOVSXD reg,e1 3087 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs); 3088 code_orrex(cdb.last(), REX_W); 3089 } 3090 else 3091 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs); 3092 freenode(e1); 3093 } 3094 else 3095 { 3096 L2: 3097 retregs = *pretregs; 3098 if (op == OPs32_64) 3099 retregs = mAX | (*pretregs & mPSW); 3100 *pretregs &= ~mPSW; // flags are already set 3101 CodeBuilder cdbx; 3102 cdbx.ctor(); 3103 codelem(cdbx,e1,&retregs,false); 3104 code *cx = cdbx.finish(); 3105 cdb.append(cdbx); 3106 getregs(cdb,retregs); 3107 if (op == OPu16_32 && cx) 3108 { 3109 cx = code_last(cx); 3110 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) && 3111 mask(cx.Irm & 7) == retregs) 3112 { 3113 // Convert AND of a word to AND of a dword, zeroing upper word 3114 if (cx.Irex & REX_B) 3115 retregs = mask(8 | (cx.Irm & 7)); 3116 cx.Iflags &= ~CFopsize; 3117 cx.IEV2.Vint &= 0xFFFF; 3118 goto L1; 3119 } 3120 } 3121 if (op == OPs16_32 && retregs == mAX) 3122 cdb.gen1(0x98); // CWDE 3123 else if (op == OPs32_64 && retregs == mAX) 3124 { 3125 cdb.gen1(0x98); // CDQE 3126 code_orrex(cdb.last(), REX_W); 3127 } 3128 else 3129 { 3130 reg = findreg(retregs); 3131 if (config.flags4 & CFG4speed && op == OPu16_32) 3132 { // AND reg,0xFFFF 3133 cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu); 3134 } 3135 else 3136 { 3137 uint iop = (op == OPu16_32) ? 0x0FB7 : 0x0FBF; // MOVZX/MOVSX reg,reg 3138 genregs(cdb,iop,reg,reg); 3139 } 3140 } 3141 L1: 3142 if (e1comsub) 3143 getregs(cdb,retregs); 3144 } 3145 fixresult(cdb,e,retregs,pretregs); 3146 return; 3147 } 3148 else if (*pretregs & mPSW || config.target_cpu < TARGET_80286) 3149 { 3150 // OPs16_32, OPs32_64 3151 // CWD doesn't affect flags, so we can depend on the integer 3152 // math to provide the flags. 3153 retregs = mAX | mPSW; // want integer result in AX 3154 *pretregs &= ~mPSW; // flags are already set 3155 codelem(cdb,e.EV.E1,&retregs,false); 3156 getregs(cdb,mDX); // sign extend into DX 3157 cdb.gen1(0x99); // CWD/CDQ 3158 if (e1comsub) 3159 getregs(cdb,retregs); 3160 fixresult(cdb,e,mDX | retregs,pretregs); 3161 return; 3162 } 3163 else 3164 { 3165 // OPs16_32, OPs32_64 3166 uint msreg,lsreg; 3167 3168 retregs = *pretregs & mLSW; 3169 assert(retregs); 3170 codelem(cdb,e.EV.E1,&retregs,false); 3171 retregs |= *pretregs & mMSW; 3172 allocreg(cdb,&retregs,®,e.Ety); 3173 msreg = findregmsw(retregs); 3174 lsreg = findreglsw(retregs); 3175 genmovreg(cdb,msreg,lsreg); // MOV msreg,lsreg 3176 assert(config.target_cpu >= TARGET_80286); // 8088 can't handle SAR reg,imm8 3177 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 3178 fixresult(cdb,e,retregs,pretregs); 3179 return; 3180 } 3181 } 3182 3183 3184 /*************************** 3185 * Convert byte to int. 3186 * For OPu8_16 and OPs8_16. 3187 */ 3188 3189 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3190 { 3191 regm_t retregs; 3192 char size; 3193 3194 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 3195 { 3196 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 3197 return; 3198 } 3199 3200 //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 3201 char op = e.Eoper; 3202 elem *e1 = e.EV.E1; 3203 if (e1.Eoper == OPcomma) 3204 docommas(cdb,&e1); 3205 if (!I16) 3206 { 3207 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3208 { 3209 code cs; 3210 3211 regm_t retregsx = *pretregs; 3212 reg_t reg; 3213 allocreg(cdb,&retregsx,®,TYint); 3214 if (config.flags4 & CFG4speed && 3215 op == OPu8_16 && mask(reg) & BYTEREGS && 3216 config.target_cpu < TARGET_PentiumPro) 3217 { 3218 movregconst(cdb,reg,0,0); // XOR reg,reg 3219 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); // MOV regL,EA 3220 } 3221 else 3222 { 3223 const opcode = (op == OPu8_16) ? 0x0FB6 : 0x0FBE; // MOVZX/MOVSX reg,EA 3224 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx); 3225 } 3226 freenode(e1); 3227 fixresult(cdb,e,retregsx,pretregs); 3228 return; 3229 } 3230 size = tysize(e.Ety); 3231 retregs = *pretregs & BYTEREGS; 3232 if (retregs == 0) 3233 retregs = BYTEREGS; 3234 retregs |= *pretregs & mPSW; 3235 *pretregs &= ~mPSW; 3236 } 3237 else 3238 { 3239 if (op == OPu8_16) // if uint conversion 3240 { 3241 retregs = *pretregs & BYTEREGS; 3242 if (retregs == 0) 3243 retregs = BYTEREGS; 3244 } 3245 else 3246 { 3247 // CBW doesn't affect flags, so we can depend on the integer 3248 // math to provide the flags. 3249 retregs = mAX | (*pretregs & mPSW); // want integer result in AX 3250 } 3251 } 3252 3253 CodeBuilder cdb1; 3254 cdb1.ctor(); 3255 codelem(cdb1,e1,&retregs,false); 3256 code *c1 = cdb1.finish(); 3257 cdb.append(cdb1); 3258 reg_t reg = findreg(retregs); 3259 code *c; 3260 if (!c1) 3261 goto L1; 3262 3263 // If previous instruction is an AND bytereg,value 3264 c = cdb.last(); 3265 if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) && 3266 (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0)) 3267 { 3268 if (*pretregs & mPSW) 3269 c.Iflags |= CFpsw; 3270 c.Iop |= 1; // convert to word operation 3271 c.IEV2.Vuns &= 0xFF; // dump any high order bits 3272 *pretregs &= ~mPSW; // flags already set 3273 } 3274 else 3275 { 3276 L1: 3277 if (!I16) 3278 { 3279 if (op == OPs8_16 && reg == AX && size == 2) 3280 { 3281 cdb.gen1(0x98); // CBW 3282 cdb.last().Iflags |= CFopsize; // don't do a CWDE 3283 } 3284 else 3285 { 3286 // We could do better by not forcing the src and dst 3287 // registers to be the same. 3288 3289 if (config.flags4 & CFG4speed && op == OPu8_16) 3290 { // AND reg,0xFF 3291 cdb.genc2(0x81,modregrmx(3,4,reg),0xFF); 3292 } 3293 else 3294 { 3295 uint iop = (op == OPu8_16) ? 0x0FB6 : 0x0FBE; // MOVZX/MOVSX reg,reg 3296 genregs(cdb,iop,reg,reg); 3297 if (I64 && reg >= 4) 3298 code_orrex(cdb.last(), REX); 3299 } 3300 } 3301 } 3302 else 3303 { 3304 if (op == OPu8_16) 3305 genregs(cdb,0x30,reg+4,reg+4); // XOR regH,regH 3306 else 3307 { 3308 cdb.gen1(0x98); // CBW 3309 *pretregs &= ~mPSW; // flags already set 3310 } 3311 } 3312 } 3313 getregs(cdb,retregs); 3314 fixresult(cdb,e,retregs,pretregs); 3315 } 3316 3317 3318 /*************************** 3319 * Convert long to short (OP32_16). 3320 * Get offset of far pointer (OPoffset). 3321 * Convert int to byte (OP16_8). 3322 * Convert long long to long (OP64_32). 3323 * OP128_64 3324 */ 3325 3326 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3327 { 3328 debug 3329 { 3330 switch (e.Eoper) 3331 { 3332 case OP32_16: 3333 case OPoffset: 3334 case OP16_8: 3335 case OP64_32: 3336 case OP128_64: 3337 break; 3338 3339 default: 3340 assert(0); 3341 } 3342 } 3343 3344 regm_t retregs; 3345 if (e.Eoper == OP16_8) 3346 { 3347 retregs = *pretregs ? BYTEREGS : 0; 3348 codelem(cdb,e.EV.E1,&retregs,false); 3349 } 3350 else 3351 { 3352 if (e.EV.E1.Eoper == OPrelconst) 3353 offsetinreg(cdb,e.EV.E1,&retregs); 3354 else 3355 { 3356 retregs = *pretregs ? ALLREGS : 0; 3357 codelem(cdb,e.EV.E1,&retregs,false); 3358 bool isOff = e.Eoper == OPoffset; 3359 if (I16 || 3360 I32 && (isOff || e.Eoper == OP64_32) || 3361 I64 && (isOff || e.Eoper == OP128_64)) 3362 retregs &= mLSW; // want LSW only 3363 } 3364 } 3365 3366 /* We "destroy" a reg by assigning it the result of a new e, even 3367 * though the values are the same. Weakness of our CSE strategy that 3368 * a register can only hold the contents of one elem at a time. 3369 */ 3370 if (e.Ecount) 3371 getregs(cdb,retregs); 3372 else 3373 useregs(retregs); 3374 3375 debug 3376 if (!(!*pretregs || retregs)) 3377 { 3378 WROP(e.Eoper), 3379 printf(" *pretregs = %s, retregs = %s, e = %p\n",regm_str(*pretregs),regm_str(retregs),e); 3380 } 3381 3382 assert(!*pretregs || retregs); 3383 fixresult(cdb,e,retregs,pretregs); // lsw only 3384 } 3385 3386 /********************************************** 3387 * Get top 32 bits of 64 bit value (I32) 3388 * or top 16 bits of 32 bit value (I16) 3389 * or top 64 bits of 128 bit value (I64). 3390 * OPmsw 3391 */ 3392 3393 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3394 { 3395 assert(e.Eoper == OPmsw); 3396 3397 regm_t retregs = *pretregs ? ALLREGS : 0; 3398 codelem(cdb,e.EV.E1,&retregs,false); 3399 retregs &= mMSW; // want MSW only 3400 3401 /* We "destroy" a reg by assigning it the result of a new e, even 3402 * though the values are the same. Weakness of our CSE strategy that 3403 * a register can only hold the contents of one elem at a time. 3404 */ 3405 if (e.Ecount) 3406 getregs(cdb,retregs); 3407 else 3408 useregs(retregs); 3409 3410 debug 3411 if (!(!*pretregs || retregs)) 3412 { WROP(e.Eoper); 3413 printf(" *pretregs = %s, retregs = %s\n",regm_str(*pretregs),regm_str(retregs)); 3414 elem_print(e); 3415 } 3416 3417 assert(!*pretregs || retregs); 3418 fixresult(cdb,e,retregs,pretregs); // msw only 3419 } 3420 3421 3422 3423 /****************************** 3424 * Handle operators OPinp and OPoutp. 3425 */ 3426 3427 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3428 { 3429 //printf("cdport\n"); 3430 ubyte op = 0xE4; // root of all IN/OUT opcodes 3431 elem *e1 = e.EV.E1; 3432 3433 // See if we can use immediate mode of IN/OUT opcodes 3434 ubyte port; 3435 if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 && 3436 (!evalinregister(e1) || regcon.mvar & mDX)) 3437 { 3438 port = cast(ubyte)e1.EV.Vuns; 3439 freenode(e1); 3440 } 3441 else 3442 { 3443 regm_t retregs = mDX; // port number is always DX 3444 codelem(cdb,e1,&retregs,false); 3445 op |= 0x08; // DX version of opcode 3446 port = 0; // not logically needed, but 3447 // quiets "uninitialized var" complaints 3448 } 3449 3450 uint sz; 3451 if (e.Eoper == OPoutp) 3452 { 3453 sz = tysize(e.EV.E2.Ety); 3454 regm_t retregs = mAX; // byte/word to output is in AL/AX 3455 scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true); 3456 op |= 0x02; // OUT opcode 3457 } 3458 else // OPinp 3459 { 3460 getregs(cdb,mAX); 3461 sz = tysize(e.Ety); 3462 } 3463 3464 if (sz != 1) 3465 op |= 1; // word operation 3466 cdb.genc2(op,0,port); // IN/OUT AL/AX,DX/port 3467 if (op & 1 && sz != REGSIZE) // if need size override 3468 cdb.last().Iflags |= CFopsize; 3469 regm_t retregs = mAX; 3470 fixresult(cdb,e,retregs,pretregs); 3471 } 3472 3473 /************************ 3474 * Generate code for an asm elem. 3475 */ 3476 3477 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3478 { 3479 // Assume only regs normally destroyed by a function are destroyed 3480 getregs(cdb,(ALLREGS | mES) & ~fregsaved); 3481 cdb.genasm(cast(char *)e.EV.Vstring, cast(uint)e.EV.Vstrlen); 3482 fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs); 3483 } 3484 3485 /************************ 3486 * Generate code for OPnp_f16p and OPf16p_np. 3487 */ 3488 3489 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3490 { 3491 code *cnop; 3492 code cs; 3493 3494 assert(I32); 3495 codelem(cdb,e.EV.E1,pretregs,false); 3496 reg_t reg = findreg(*pretregs); 3497 getregs(cdb,*pretregs); // we will destroy the regs 3498 3499 cs.Iop = 0xC1; 3500 cs.Irm = modregrm(3,0,reg); 3501 cs.Iflags = 0; 3502 cs.Irex = 0; 3503 cs.IFL2 = FLconst; 3504 cs.IEV2.Vuns = 16; 3505 3506 cdb.gen(&cs); // ROL ereg,16 3507 cs.Irm |= modregrm(0,1,0); 3508 cdb.gen(&cs); // ROR ereg,16 3509 cs.IEV2.Vuns = 3; 3510 cs.Iflags |= CFopsize; 3511 3512 if (e.Eoper == OPnp_f16p) 3513 { 3514 /* OR ereg,ereg 3515 JE L1 3516 ROR ereg,16 3517 SHL reg,3 3518 MOV rx,SS 3519 AND rx,3 ;mask off CPL bits 3520 OR rl,4 ;run on LDT bit 3521 OR regl,rl 3522 ROL ereg,16 3523 L1: NOP 3524 */ 3525 reg_t rx; 3526 3527 regm_t retregs = BYTEREGS & ~*pretregs; 3528 allocreg(cdb,&retregs,&rx,TYint); 3529 cnop = gennop(null); 3530 int jop = JCXZ; 3531 if (reg != CX) 3532 { 3533 gentstreg(cdb,reg); 3534 jop = JE; 3535 } 3536 genjmp(cdb,jop,FLcode, cast(block *)cnop); // Jop L1 3537 NEWREG(cs.Irm,4); 3538 cdb.gen(&cs); // SHL reg,3 3539 genregs(cdb,0x8C,2,rx); // MOV rx,SS 3540 int isbyte = (mask(reg) & BYTEREGS) == 0; 3541 cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3); // AND rl,3 3542 cdb.genc2(0x80,modregrm(3,1,rx),4); // OR rl,4 3543 genregs(cdb,0x0A | isbyte,reg,rx); // OR regl,rl 3544 } 3545 else // OPf16p_np 3546 { 3547 /* ROR ereg,16 3548 SHR reg,3 3549 ROL ereg,16 3550 */ 3551 3552 cs.Irm |= modregrm(0,5,0); 3553 cdb.gen(&cs); // SHR reg,3 3554 cnop = null; 3555 } 3556 } 3557 3558 /************************* 3559 * Generate code for OPbtst 3560 */ 3561 3562 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3563 { 3564 regm_t retregs; 3565 reg_t reg; 3566 3567 //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 3568 3569 opcode_t op = 0xA3; // BT EA,value 3570 int mode = 4; 3571 3572 elem *e1 = e.EV.E1; 3573 elem *e2 = e.EV.E2; 3574 code cs; 3575 cs.Iflags = 0; 3576 3577 if (*pretregs == 0) // if don't want result 3578 { 3579 codelem(cdb,e1,pretregs,false); // eval left leaf 3580 *pretregs = 0; // in case they got set 3581 codelem(cdb,e2,pretregs,false); 3582 return; 3583 } 3584 3585 regm_t idxregs; 3586 if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar) 3587 { 3588 getlvalue(cdb, &cs, e1, RMload); // get addressing mode 3589 idxregs = idxregm(&cs); // mask if index regs used 3590 } 3591 else 3592 { 3593 retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs; 3594 codelem(cdb,e1, &retregs, false); 3595 reg = findreg(retregs); 3596 cs.Irm = modregrm(3,0,reg & 7); 3597 cs.Iflags = 0; 3598 cs.Irex = 0; 3599 if (reg & 8) 3600 cs.Irex |= REX_B; 3601 idxregs = retregs; 3602 } 3603 3604 tym_t ty1 = tybasic(e1.Ety); 3605 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 3606 3607 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 3608 if (e2.Eoper == OPconst) 3609 { 3610 cs.Iop = 0x0FBA; // BT rm,imm8 3611 cs.Irm |= modregrm(0,mode,0); 3612 cs.Iflags |= CFpsw | word; 3613 cs.IFL2 = FLconst; 3614 if (_tysize[ty1] == SHORTSIZE) 3615 { 3616 cs.IEV2.Vint = e2.EV.Vint & 15; 3617 } 3618 else if (_tysize[ty1] == 4) 3619 { 3620 cs.IEV2.Vint = e2.EV.Vint & 31; 3621 } 3622 else 3623 { 3624 cs.IEV2.Vint = e2.EV.Vint & 63; 3625 if (I64) 3626 cs.Irex |= REX_W; 3627 } 3628 cdb.gen(&cs); 3629 } 3630 else 3631 { 3632 retregs = ALLREGS & ~idxregs; 3633 scodelem(cdb,e2,&retregs,idxregs,true); 3634 reg = findreg(retregs); 3635 3636 cs.Iop = 0x0F00 | op; // BT rm,reg 3637 code_newreg(&cs,reg); 3638 cs.Iflags |= CFpsw | word; 3639 if (I64 && _tysize[ty1] == 8) 3640 cs.Irex |= REX_W; 3641 cdb.gen(&cs); 3642 } 3643 3644 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 3645 { 3646 if (tysize(e.Ety) == 1) 3647 { 3648 assert(I64 || retregs & BYTEREGS); 3649 allocreg(cdb,&retregs,®,TYint); 3650 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 3651 if (I64 && reg >= 4) 3652 code_orrex(cdb.last(), REX); 3653 *pretregs = retregs; 3654 } 3655 else 3656 { 3657 code *cnop = null; 3658 regm_t save = regcon.immed.mval; 3659 allocreg(cdb,&retregs,®,TYint); 3660 regcon.immed.mval = save; 3661 if ((*pretregs & mPSW) == 0) 3662 { 3663 getregs(cdb,retregs); 3664 genregs(cdb,0x19,reg,reg); // SBB reg,reg 3665 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 3666 } 3667 else 3668 { 3669 movregconst(cdb,reg,1,8); // MOV reg,1 3670 cnop = gennop(null); 3671 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 3672 // MOV reg,0 3673 movregconst(cdb,reg,0,8); 3674 regcon.immed.mval &= ~mask(reg); 3675 } 3676 *pretregs = retregs; 3677 cdb.append(cnop); 3678 } 3679 } 3680 } 3681 3682 /************************* 3683 * Generate code for OPbt, OPbtc, OPbtr, OPbts 3684 */ 3685 3686 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 3687 { 3688 //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs)); 3689 regm_t retregs; 3690 reg_t reg; 3691 opcode_t op; 3692 int mode; 3693 3694 switch (e.Eoper) 3695 { 3696 case OPbt: op = 0xA3; mode = 4; break; 3697 case OPbtc: op = 0xBB; mode = 7; break; 3698 case OPbtr: op = 0xB3; mode = 6; break; 3699 case OPbts: op = 0xAB; mode = 5; break; 3700 3701 default: 3702 assert(0); 3703 } 3704 3705 elem *e1 = e.EV.E1; 3706 elem *e2 = e.EV.E2; 3707 code cs; 3708 cs.Iflags = 0; 3709 3710 getlvalue(cdb, &cs, e, RMload); // get addressing mode 3711 if (e.Eoper == OPbt && *pretregs == 0) 3712 { 3713 codelem(cdb,e2,pretregs,false); 3714 return; 3715 } 3716 3717 tym_t ty1 = tybasic(e1.Ety); 3718 tym_t ty2 = tybasic(e2.Ety); 3719 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 3720 regm_t idxregs = idxregm(&cs); // mask if index regs used 3721 3722 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 3723 if (e2.Eoper == OPconst) 3724 { 3725 cs.Iop = 0x0FBA; // BT rm,imm8 3726 cs.Irm |= modregrm(0,mode,0); 3727 cs.Iflags |= CFpsw | word; 3728 cs.IFL2 = FLconst; 3729 if (_tysize[ty1] == SHORTSIZE) 3730 { 3731 cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3; 3732 cs.IEV2.Vint = e2.EV.Vint & 15; 3733 } 3734 else if (_tysize[ty1] == 4) 3735 { 3736 cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3; 3737 cs.IEV2.Vint = e2.EV.Vint & 31; 3738 } 3739 else 3740 { 3741 cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3; 3742 cs.IEV2.Vint = e2.EV.Vint & 63; 3743 if (I64) 3744 cs.Irex |= REX_W; 3745 } 3746 cdb.gen(&cs); 3747 } 3748 else 3749 { 3750 retregs = ALLREGS & ~idxregs; 3751 scodelem(cdb,e2,&retregs,idxregs,true); 3752 reg = findreg(retregs); 3753 3754 cs.Iop = 0x0F00 | op; // BT rm,reg 3755 code_newreg(&cs,reg); 3756 cs.Iflags |= CFpsw | word; 3757 if (_tysize[ty2] == 8 && I64) 3758 cs.Irex |= REX_W; 3759 cdb.gen(&cs); 3760 } 3761 3762 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 3763 { 3764 if (_tysize[e.Ety] == 1) 3765 { 3766 assert(I64 || retregs & BYTEREGS); 3767 allocreg(cdb,&retregs,®,TYint); 3768 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 3769 if (I64 && reg >= 4) 3770 code_orrex(cdb.last(), REX); 3771 *pretregs = retregs; 3772 } 3773 else 3774 { 3775 code *cnop = null; 3776 regm_t save = regcon.immed.mval; 3777 allocreg(cdb,&retregs,®,TYint); 3778 regcon.immed.mval = save; 3779 if ((*pretregs & mPSW) == 0) 3780 { 3781 getregs(cdb,retregs); 3782 genregs(cdb,0x19,reg,reg); // SBB reg,reg 3783 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 3784 } 3785 else 3786 { 3787 movregconst(cdb,reg,1,8); // MOV reg,1 3788 cnop = gennop(null); 3789 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 3790 // MOV reg,0 3791 movregconst(cdb,reg,0,8); 3792 regcon.immed.mval &= ~mask(reg); 3793 } 3794 *pretregs = retregs; 3795 cdb.append(cnop); 3796 } 3797 } 3798 } 3799 3800 /************************************* 3801 * Generate code for OPbsf and OPbsr. 3802 */ 3803 3804 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3805 { 3806 //printf("cdbscan()\n"); 3807 //elem_print(e); 3808 if (!*pretregs) 3809 { 3810 codelem(cdb,e.EV.E1,pretregs,false); 3811 return; 3812 } 3813 3814 tym_t tyml = tybasic(e.EV.E1.Ety); 3815 int sz = _tysize[tyml]; 3816 assert(sz == 2 || sz == 4 || sz == 8); 3817 regm_t retregs; 3818 reg_t reg; 3819 code cs; 3820 3821 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 3822 { 3823 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 3824 } 3825 else 3826 { 3827 retregs = allregs; 3828 codelem(cdb,e.EV.E1, &retregs, false); 3829 reg = findreg(retregs); 3830 cs.Irm = modregrm(3,0,reg & 7); 3831 cs.Iflags = 0; 3832 cs.Irex = 0; 3833 if (reg & 8) 3834 cs.Irex |= REX_B; 3835 } 3836 3837 retregs = *pretregs & allregs; 3838 if (!retregs) 3839 retregs = allregs; 3840 allocreg(cdb,&retregs, ®, e.Ety); 3841 3842 cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD; // BSF/BSR reg,EA 3843 code_newreg(&cs, reg); 3844 if (!I16 && sz == SHORTSIZE) 3845 cs.Iflags |= CFopsize; 3846 cdb.gen(&cs); 3847 if (sz == 8) 3848 code_orrex(cdb.last(), REX_W); 3849 3850 fixresult(cdb,e,retregs,pretregs); 3851 } 3852 3853 /************************ 3854 * OPpopcnt operator 3855 */ 3856 3857 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3858 { 3859 //printf("cdpopcnt()\n"); 3860 //elem_print(e); 3861 assert(!I16); 3862 if (!*pretregs) 3863 { 3864 codelem(cdb,e.EV.E1,pretregs,false); 3865 return; 3866 } 3867 3868 tym_t tyml = tybasic(e.EV.E1.Ety); 3869 3870 int sz = _tysize[tyml]; 3871 assert(sz == 2 || sz == 4 || (sz == 8 && I64)); // no byte op 3872 3873 code cs; 3874 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 3875 { 3876 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 3877 } 3878 else 3879 { 3880 regm_t retregs = allregs; 3881 codelem(cdb,e.EV.E1, &retregs, false); 3882 reg_t reg = cast(ubyte)findreg(retregs); 3883 cs.Irm = modregrm(3,0,reg & 7); 3884 cs.Iflags = 0; 3885 cs.Irex = 0; 3886 if (reg & 8) 3887 cs.Irex |= REX_B; 3888 } 3889 3890 regm_t retregs = *pretregs & allregs; 3891 if (!retregs) 3892 retregs = allregs; 3893 reg_t reg; 3894 allocreg(cdb,&retregs, ®, e.Ety); 3895 3896 cs.Iop = POPCNT; // POPCNT reg,EA 3897 code_newreg(&cs, reg); 3898 if (sz == SHORTSIZE) 3899 cs.Iflags |= CFopsize; 3900 if (*pretregs & mPSW) 3901 cs.Iflags |= CFpsw; 3902 cdb.gen(&cs); 3903 if (sz == 8) 3904 code_orrex(cdb.last(), REX_W); 3905 *pretregs &= mBP | ALLREGS; // flags already set 3906 3907 fixresult(cdb,e,retregs,pretregs); 3908 } 3909 3910 3911 /******************************************* 3912 * Generate code for OPpair, OPrpair. 3913 */ 3914 3915 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3916 { 3917 if (*pretregs == 0) // if don't want result 3918 { 3919 codelem(cdb,e.EV.E1,pretregs,false); // eval left leaf 3920 *pretregs = 0; // in case they got set 3921 codelem(cdb,e.EV.E2,pretregs,false); 3922 return; 3923 } 3924 3925 //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3926 //printf("Ecount = %d\n", e.Ecount); 3927 3928 regm_t retregs = *pretregs; 3929 if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087) 3930 { 3931 if (config.fpxmmregs) 3932 retregs |= mXMM0 | mXMM1; 3933 else 3934 retregs |= mST01; 3935 } 3936 3937 if (retregs & mST01) 3938 { 3939 loadPair87(cdb, e, pretregs); 3940 return; 3941 } 3942 3943 regm_t regs1; 3944 regm_t regs2; 3945 if (retregs & XMMREGS) 3946 { 3947 retregs &= XMMREGS; 3948 const reg = findreg(retregs); 3949 regs1 = mask(reg); 3950 regs2 = mask(findreg(retregs & ~regs1)); 3951 } 3952 else 3953 { 3954 retregs &= allregs; 3955 if (!retregs) 3956 retregs = allregs; 3957 regs1 = retregs & mLSW; 3958 regs2 = retregs & mMSW; 3959 } 3960 if (e.Eoper == OPrpair) 3961 { 3962 // swap 3963 regs1 ^= regs2; 3964 regs2 ^= regs1; 3965 regs1 ^= regs2; 3966 } 3967 //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2)); 3968 3969 codelem(cdb,e.EV.E1, ®s1, false); 3970 scodelem(cdb,e.EV.E2, ®s2, regs1, false); 3971 //printf("2: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2)); 3972 3973 if (e.EV.E1.Ecount) 3974 getregs(cdb,regs1); 3975 if (e.EV.E2.Ecount) 3976 getregs(cdb,regs2); 3977 3978 fixresult(cdb,e,regs1 | regs2,pretregs); 3979 } 3980 3981 /************************* 3982 * Generate code for OPcmpxchg 3983 */ 3984 3985 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 3986 { 3987 /* The form is: 3988 * OPcmpxchg 3989 * / \ 3990 * lvalue OPparam 3991 * / \ 3992 * old new 3993 */ 3994 3995 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 3996 elem *e1 = e.EV.E1; 3997 elem *e2 = e.EV.E2; 3998 assert(e2.Eoper == OPparam); 3999 assert(!e2.Ecount); 4000 4001 tym_t tyml = tybasic(e1.Ety); // type of lvalue 4002 uint sz = _tysize[tyml]; 4003 4004 if (I32 && sz == 8) 4005 { 4006 regm_t retregs = mDX|mAX; 4007 codelem(cdb,e2.EV.E1,&retregs,false); // [DX,AX] = e2.EV.E1 4008 4009 retregs = mCX|mBX; 4010 scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false); // [CX,BX] = e2.EV.E2 4011 4012 code cs; 4013 getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX); // get EA 4014 4015 getregs(cdb,mDX|mAX); // CMPXCHG destroys these regs 4016 4017 if (e1.Ety & mTYvolatile) 4018 cdb.gen1(LOCK); // LOCK prefix 4019 cs.Iop = 0x0FC7; // CMPXCHG8B EA 4020 cs.Iflags |= CFpsw; 4021 code_newreg(&cs,1); 4022 cdb.gen(&cs); 4023 4024 assert(!e1.Ecount); 4025 freenode(e1); 4026 } 4027 else 4028 { 4029 uint isbyte = (sz == 1); // 1 for byte operation 4030 ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 4031 uint rex = (I64 && sz == 8) ? REX_W : 0; 4032 4033 regm_t retregs = mAX; 4034 codelem(cdb,e2.EV.E1,&retregs,false); // AX = e2.EV.E1 4035 4036 retregs = (ALLREGS | mBP) & ~mAX; 4037 scodelem(cdb,e2.EV.E2,&retregs,mAX,false); // load rvalue in reg 4038 4039 code cs; 4040 getlvalue(cdb,&cs,e1,mAX | retregs); // get EA 4041 4042 getregs(cdb,mAX); // CMPXCHG destroys AX 4043 4044 if (e1.Ety & mTYvolatile) 4045 cdb.gen1(LOCK); // LOCK prefix 4046 cs.Iop = 0x0FB1 ^ isbyte; // CMPXCHG EA,reg 4047 cs.Iflags |= CFpsw | word; 4048 cs.Irex |= rex; 4049 reg_t reg = findreg(retregs); 4050 code_newreg(&cs,reg); 4051 cdb.gen(&cs); 4052 4053 assert(!e1.Ecount); 4054 freenode(e1); 4055 } 4056 4057 regm_t retregs; 4058 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 4059 { 4060 assert(tysize(e.Ety) == 1); 4061 assert(I64 || retregs & BYTEREGS); 4062 reg_t reg; 4063 allocreg(cdb,&retregs,®,TYint); 4064 uint ea = modregrmx(3,0,reg); 4065 if (I64 && reg >= 4) 4066 ea |= REX << 16; 4067 cdb.gen2(0x0F94,ea); // SETZ reg 4068 *pretregs = retregs; 4069 } 4070 } 4071 4072 /************************* 4073 * Generate code for OPprefetch 4074 */ 4075 4076 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4077 { 4078 /* Generate the following based on e2: 4079 * 0: prefetch0 4080 * 1: prefetch1 4081 * 2: prefetch2 4082 * 3: prefetchnta 4083 * 4: prefetchw 4084 * 5: prefetchwt1 4085 */ 4086 //printf("cdprefetch\n"); 4087 elem *e1 = e.EV.E1; 4088 4089 assert(*pretregs == 0); 4090 assert(e.EV.E2.Eoper == OPconst); 4091 opcode_t op; 4092 reg_t reg; 4093 switch (e.EV.E2.EV.Vuns) 4094 { 4095 case 0: op = PREFETCH; reg = 1; break; // PREFETCH0 4096 case 1: op = PREFETCH; reg = 2; break; // PREFETCH1 4097 case 2: op = PREFETCH; reg = 3; break; // PREFETCH2 4098 case 3: op = PREFETCH; reg = 0; break; // PREFETCHNTA 4099 case 4: op = 0x0F0D; reg = 1; break; // PREFETCHW 4100 case 5: op = 0x0F0D; reg = 2; break; // PREFETCHWT1 4101 default: assert(0); 4102 } 4103 4104 freenode(e.EV.E2); 4105 4106 code cs; 4107 getlvalue(cdb,&cs,e1,0); 4108 cs.Iop = op; 4109 cs.Irm |= modregrm(0,reg,0); 4110 cs.Iflags |= CFvolatile; // do not schedule 4111 cdb.gen(&cs); 4112 } 4113 4114 }