1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Mostly code generation for assignment operators. 6 * 7 * Copyright: Copyright (C) 1985-1998 by Symantec 8 * Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved 9 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 10 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 11 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod4.d, backend/cod4.d) 12 * Documentation: https://dlang.org/phobos/dmd_backend_cod4.html 13 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod4.d 14 */ 15 16 module dmd.backend.cod4; 17 18 version (SCPP) 19 version = COMPILE; 20 version (MARS) 21 version = COMPILE; 22 23 version (COMPILE) 24 { 25 26 import core.stdc.stdio; 27 import core.stdc.stdlib; 28 import core.stdc.string; 29 30 import dmd.backend.cc; 31 import dmd.backend.cdef; 32 import dmd.backend.code; 33 import dmd.backend.code_x86; 34 import dmd.backend.codebuilder; 35 import dmd.backend.mem; 36 import dmd.backend.el; 37 import dmd.backend.global; 38 import dmd.backend.oper; 39 import dmd.backend.ty; 40 import dmd.backend.evalu8 : el_toldoubled; 41 import dmd.backend.xmm; 42 43 extern (C++): 44 45 nothrow: 46 47 int REGSIZE(); 48 49 extern __gshared CGstate cgstate; 50 extern __gshared bool[FLMAX] datafl; 51 52 private extern (D) uint mask(uint m) { return 1 << m; } 53 54 /* AX,CX,DX,BX */ 55 __gshared const reg_t[4] dblreg = [ BX,DX,NOREG,CX ]; 56 57 // from divcoeff.c 58 extern (C) 59 { 60 bool choose_multiplier(int N, ulong d, int prec, ulong *pm, int *pshpost); 61 bool udiv_coefficients(int N, ulong d, int *pshpre, ulong *pm, int *pshpost); 62 } 63 64 /******************************* 65 * Return number of times symbol s appears in tree e. 66 */ 67 68 private int intree(Symbol *s,elem *e) 69 { 70 if (!OTleaf(e.Eoper)) 71 return intree(s,e.EV.E1) + (OTbinary(e.Eoper) ? intree(s,e.EV.E2) : 0); 72 return e.Eoper == OPvar && e.EV.Vsym == s; 73 } 74 75 /*********************************** 76 * Determine if expression e can be evaluated directly into register 77 * variable s. 78 * Have to be careful about things like x=x+x+x, and x=a+x. 79 * Returns: 80 * !=0 can 81 * 0 can't 82 */ 83 84 int doinreg(Symbol *s, elem *e) 85 { 86 int in_ = 0; 87 OPER op; 88 89 L1: 90 op = e.Eoper; 91 if (op == OPind || 92 OTcall(op) || 93 OTleaf(op) || 94 (in_ = intree(s,e)) == 0 || 95 (OTunary(op) && OTleaf(e.EV.E1.Eoper)) 96 ) 97 return 1; 98 if (in_ == 1) 99 { 100 switch (op) 101 { 102 case OPadd: 103 case OPmin: 104 case OPand: 105 case OPor: 106 case OPxor: 107 case OPshl: 108 case OPmul: 109 if (!intree(s,e.EV.E2)) 110 { 111 e = e.EV.E1; 112 goto L1; 113 } 114 break; 115 116 default: 117 break; 118 } 119 } 120 return 0; 121 } 122 123 /**************************** 124 * Return code for saving common subexpressions if EA 125 * turns out to be a register. 126 * This is called just before modifying an EA. 127 */ 128 129 void modEA(ref CodeBuilder cdb,code *c) 130 { 131 if ((c.Irm & 0xC0) == 0xC0) // addressing mode refers to a register 132 { 133 reg_t reg = c.Irm & 7; 134 if (c.Irex & REX_B) 135 { reg |= 8; 136 assert(I64); 137 } 138 getregs(cdb,mask(reg)); 139 } 140 } 141 142 143 /**************************** 144 * Gen code for op= for doubles. 145 */ 146 147 private void opassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs,OPER op) 148 { 149 assert(config.exe & EX_windos); // for targets that may not have an 8087 150 151 static immutable uint[OPdivass - OPpostinc + 1] clibtab = 152 /* OPpostinc,OPpostdec,OPeq,OPaddass,OPminass,OPmulass,OPdivass */ 153 [ CLIB.dadd, CLIB.dsub, cast(uint)-1, CLIB.dadd,CLIB.dsub,CLIB.dmul,CLIB.ddiv ]; 154 155 if (config.inline8087) 156 { 157 opass87(cdb,e,pretregs); 158 return; 159 } 160 161 code cs; 162 regm_t retregs2,retregs,idxregs; 163 164 uint clib = clibtab[op - OPpostinc]; 165 elem *e1 = e.EV.E1; 166 tym_t tym = tybasic(e1.Ety); 167 getlvalue(cdb,&cs,e1,DOUBLEREGS | mBX | mCX); 168 169 if (tym == TYfloat) 170 { 171 clib += CLIB.fadd - CLIB.dadd; /* convert to float operation */ 172 173 // Load EA into FLOATREGS 174 getregs(cdb,FLOATREGS); 175 cs.Iop = LOD; 176 cs.Irm |= modregrm(0,AX,0); 177 cdb.gen(&cs); 178 179 if (!I32) 180 { 181 cs.Irm |= modregrm(0,DX,0); 182 getlvalue_msw(&cs); 183 cdb.gen(&cs); 184 getlvalue_lsw(&cs); 185 186 } 187 retregs2 = FLOATREGS2; 188 idxregs = FLOATREGS | idxregm(&cs); 189 retregs = FLOATREGS; 190 } 191 else 192 { 193 if (I32) 194 { 195 // Load EA into DOUBLEREGS 196 getregs(cdb,DOUBLEREGS_32); 197 cs.Iop = LOD; 198 cs.Irm |= modregrm(0,AX,0); 199 cdb.gen(&cs); 200 cs.Irm |= modregrm(0,DX,0); 201 getlvalue_msw(&cs); 202 cdb.gen(&cs); 203 getlvalue_lsw(&cs); 204 205 retregs2 = DOUBLEREGS2_32; 206 idxregs = DOUBLEREGS_32 | idxregm(&cs); 207 } 208 else 209 { 210 // Push EA onto stack 211 cs.Iop = 0xFF; 212 cs.Irm |= modregrm(0,6,0); 213 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 214 cdb.gen(&cs); 215 getlvalue_lsw(&cs); 216 cdb.gen(&cs); 217 getlvalue_lsw(&cs); 218 cdb.gen(&cs); 219 getlvalue_lsw(&cs); 220 cdb.gen(&cs); 221 stackpush += DOUBLESIZE; 222 223 retregs2 = DOUBLEREGS_16; 224 idxregs = idxregm(&cs); 225 } 226 retregs = DOUBLEREGS; 227 } 228 229 if ((cs.Iflags & CFSEG) == CFes) 230 idxregs |= mES; 231 cgstate.stackclean++; 232 scodelem(cdb,e.EV.E2,&retregs2,idxregs,false); 233 cgstate.stackclean--; 234 callclib(cdb,e,clib,&retregs,0); 235 if (e1.Ecount) 236 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 237 freenode(e1); 238 cs.Iop = STO; // MOV EA,DOUBLEREGS 239 fltregs(cdb,&cs,tym); 240 fixresult(cdb,e,retregs,pretregs); 241 } 242 243 /**************************** 244 * Gen code for OPnegass for doubles. 245 */ 246 247 private void opnegassdbl(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 248 { 249 assert(config.exe & EX_windos); // for targets that may not have an 8087 250 251 if (config.inline8087) 252 { 253 cdnegass87(cdb,e,pretregs); 254 return; 255 } 256 elem *e1 = e.EV.E1; 257 tym_t tym = tybasic(e1.Ety); 258 int sz = _tysize[tym]; 259 code cs; 260 261 getlvalue(cdb,&cs,e1,*pretregs ? DOUBLEREGS | mBX | mCX : 0); 262 modEA(cdb,&cs); 263 cs.Irm |= modregrm(0,6,0); 264 cs.Iop = 0x80; 265 cs.IEV1.Voffset += sz - 1; 266 cs.IFL2 = FLconst; 267 cs.IEV2.Vuns = 0x80; 268 cdb.gen(&cs); // XOR 7[EA],0x80 269 if (tycomplex(tym)) 270 { 271 cs.IEV1.Voffset -= sz / 2; 272 cdb.gen(&cs); // XOR 7[EA],0x80 273 } 274 275 regm_t retregs; 276 if (*pretregs || e1.Ecount) 277 { 278 cs.IEV1.Voffset -= sz - 1; 279 280 if (tym == TYfloat) 281 { 282 // Load EA into FLOATREGS 283 getregs(cdb,FLOATREGS); 284 cs.Iop = LOD; 285 NEWREG(cs.Irm, AX); 286 cdb.gen(&cs); 287 288 if (!I32) 289 { 290 NEWREG(cs.Irm, DX); 291 getlvalue_msw(&cs); 292 cdb.gen(&cs); 293 getlvalue_lsw(&cs); 294 295 } 296 retregs = FLOATREGS; 297 } 298 else 299 { 300 if (I32) 301 { 302 // Load EA into DOUBLEREGS 303 getregs(cdb,DOUBLEREGS_32); 304 cs.Iop = LOD; 305 cs.Irm &= ~cast(uint)modregrm(0,7,0); 306 cs.Irm |= modregrm(0,AX,0); 307 cdb.gen(&cs); 308 cs.Irm |= modregrm(0,DX,0); 309 getlvalue_msw(&cs); 310 cdb.gen(&cs); 311 getlvalue_lsw(&cs); 312 } 313 else 314 { 315 static if (1) 316 { 317 cs.Iop = LOD; 318 fltregs(cdb,&cs,TYdouble); // MOV DOUBLEREGS, EA 319 } 320 else 321 { 322 // Push EA onto stack 323 cs.Iop = 0xFF; 324 cs.Irm |= modregrm(0,6,0); 325 cs.IEV1.Voffset += DOUBLESIZE - REGSIZE; 326 cdb.gen(&cs); 327 cs.IEV1.Voffset -= REGSIZE; 328 cdb.gen(&cs); 329 cs.IEV1.Voffset -= REGSIZE; 330 cdb.gen(&cs); 331 cs.IEV1.Voffset -= REGSIZE; 332 cdb.gen(&cs); 333 stackpush += DOUBLESIZE; 334 } 335 } 336 retregs = DOUBLEREGS; 337 } 338 if (e1.Ecount) 339 cssave(e1,retregs,!OTleaf(e1.Eoper)); /* if lvalue is a CSE */ 340 } 341 else 342 { 343 retregs = 0; 344 assert(e1.Ecount == 0); 345 } 346 347 freenode(e1); 348 fixresult(cdb,e,retregs,pretregs); 349 } 350 351 352 353 /************************ 354 * Generate code for an assignment. 355 */ 356 357 void cdeq(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 358 { 359 tym_t tymll; 360 reg_t reg; 361 code cs; 362 elem *e11; 363 bool regvar; // true means evaluate into register variable 364 regm_t varregm; 365 reg_t varreg; 366 targ_int postinc; 367 368 //printf("cdeq(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 369 elem *e1 = e.EV.E1; 370 elem *e2 = e.EV.E2; 371 int e2oper = e2.Eoper; 372 tym_t tyml = tybasic(e1.Ety); // type of lvalue 373 regm_t retregs = *pretregs; 374 375 if (tyxmmreg(tyml) && config.fpxmmregs) 376 { 377 xmmeq(cdb, e, CMP, e1, e2, pretregs); 378 return; 379 } 380 381 if (tyfloating(tyml) && config.inline8087) 382 { 383 if (tycomplex(tyml)) 384 { 385 complex_eq87(cdb, e, pretregs); 386 return; 387 } 388 389 if (!(retregs == 0 && 390 (e2oper == OPconst || e2oper == OPvar || e2oper == OPind)) 391 ) 392 { 393 eq87(cdb,e,pretregs); 394 return; 395 } 396 if (config.target_cpu >= TARGET_PentiumPro && 397 (e2oper == OPvar || e2oper == OPind) 398 ) 399 { 400 eq87(cdb,e,pretregs); 401 return; 402 } 403 if (tyml == TYldouble || tyml == TYildouble) 404 { 405 eq87(cdb,e,pretregs); 406 return; 407 } 408 } 409 410 uint sz = _tysize[tyml]; // # of bytes to transfer 411 assert(cast(int)sz > 0); 412 413 if (retregs == 0) // if no return value 414 { 415 int fl; 416 417 /* If registers are tight, and we might need them for the lvalue, 418 * prefer to not use them for the rvalue 419 */ 420 bool plenty = true; 421 if (e1.Eoper == OPind) 422 { 423 /* Will need 1 register for evaluation, +2 registers for 424 * e1's addressing mode 425 */ 426 regm_t m = allregs & ~regcon.mvar; // mask of non-register variables 427 m &= m - 1; // clear least significant bit 428 m &= m - 1; // clear least significant bit 429 plenty = m != 0; // at least 3 registers 430 } 431 432 if ((e2oper == OPconst || // if rvalue is a constant 433 e2oper == OPrelconst && 434 !(I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) && 435 ((fl = el_fl(e2)) == FLdata || 436 fl==FLudata || fl == FLextern) 437 && !(e2.EV.Vsym.ty() & mTYcs) 438 ) && 439 !(evalinregister(e2) && plenty) && 440 !e1.Ecount) // and no CSE headaches 441 { 442 // Look for special case of (*p++ = ...), where p is a register variable 443 if (e1.Eoper == OPind && 444 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 445 e11.EV.E1.Eoper == OPvar && 446 e11.EV.E1.EV.Vsym.Sfl == FLreg && 447 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 448 ) 449 { 450 Symbol *s = e11.EV.E1.EV.Vsym; 451 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 452 { 453 regcon.params &= ~s.Spregm(); 454 } 455 postinc = e11.EV.E2.EV.Vint; 456 if (e11.Eoper == OPpostdec) 457 postinc = -postinc; 458 getlvalue(cdb,&cs,e1,RMstore); 459 freenode(e11.EV.E2); 460 } 461 else 462 { 463 postinc = 0; 464 getlvalue(cdb,&cs,e1,RMstore); 465 466 if (e2oper == OPconst && 467 config.flags4 & CFG4speed && 468 (config.target_cpu == TARGET_Pentium || 469 config.target_cpu == TARGET_PentiumMMX) && 470 (cs.Irm & 0xC0) == 0x80 471 ) 472 { 473 if (I64 && sz == 8 && e2.EV.Vpointer) 474 { 475 // MOV reg,imm64 476 // MOV EA,reg 477 regm_t rregm = allregs & ~idxregm(&cs); 478 reg_t regx; 479 regwithvalue(cdb,rregm,e2.EV.Vpointer,®x,64); 480 cs.Iop = STO; 481 cs.Irm |= modregrm(0,regx & 7,0); 482 if (regx & 8) 483 cs.Irex |= REX_R; 484 cdb.gen(&cs); 485 freenode(e2); 486 goto Lp; 487 } 488 if ((sz == REGSIZE || (I64 && sz == 4)) && e2.EV.Vint) 489 { 490 // MOV reg,imm 491 // MOV EA,reg 492 regm_t rregm = allregs & ~idxregm(&cs); 493 reg_t regx; 494 regwithvalue(cdb,rregm,e2.EV.Vint,®x,0); 495 cs.Iop = STO; 496 cs.Irm |= modregrm(0,regx & 7,0); 497 if (regx & 8) 498 cs.Irex |= REX_R; 499 cdb.gen(&cs); 500 freenode(e2); 501 goto Lp; 502 } 503 if (sz == 2 * REGSIZE && e2.EV.Vllong == 0) 504 { 505 // MOV reg,imm 506 // MOV EA,reg 507 // MOV EA+2,reg 508 regm_t rregm = getscratch() & ~idxregm(&cs); 509 if (rregm) 510 { 511 reg_t regx; 512 regwithvalue(cdb,rregm,e2.EV.Vint,®x,0); 513 cs.Iop = STO; 514 cs.Irm |= modregrm(0,regx,0); 515 cdb.gen(&cs); 516 getlvalue_msw(&cs); 517 cdb.gen(&cs); 518 freenode(e2); 519 goto Lp; 520 } 521 } 522 } 523 } 524 525 // If loading result into a register 526 if ((cs.Irm & 0xC0) == 0xC0) 527 { 528 modEA(cdb,&cs); 529 if (sz == 2 * REGSIZE && cs.IFL1 == FLreg) 530 getregs(cdb,cs.IEV1.Vsym.Sregm); 531 } 532 cs.Iop = (sz == 1) ? 0xC6 : 0xC7; 533 534 if (e2oper == OPrelconst) 535 { 536 cs.IEV2.Voffset = e2.EV.Voffset; 537 cs.IFL2 = cast(ubyte)fl; 538 cs.IEV2.Vsym = e2.EV.Vsym; 539 cs.Iflags |= CFoff; 540 cdb.gen(&cs); // MOV EA,&variable 541 if (I64 && sz == 8) 542 code_orrex(cdb.last(), REX_W); 543 if (sz > REGSIZE) 544 { 545 cs.Iop = 0x8C; 546 getlvalue_msw(&cs); 547 cs.Irm |= modregrm(0,3,0); 548 cdb.gen(&cs); // MOV EA+2,DS 549 } 550 } 551 else 552 { 553 assert(e2oper == OPconst); 554 cs.IFL2 = FLconst; 555 targ_size_t *p = cast(targ_size_t *) &(e2.EV); 556 cs.IEV2.Vsize_t = *p; 557 // Look for loading a register variable 558 if ((cs.Irm & 0xC0) == 0xC0) 559 { 560 reg_t regx = cs.Irm & 7; 561 562 if (cs.Irex & REX_B) 563 regx |= 8; 564 if (I64 && sz == 8) 565 movregconst(cdb,regx,*p,64); 566 else 567 movregconst(cdb,regx,*p,1 ^ (cs.Iop & 1)); 568 if (sz == 2 * REGSIZE) 569 { getlvalue_msw(&cs); 570 if (REGSIZE == 2) 571 movregconst(cdb,cs.Irm & 7,(cast(ushort *)p)[1],0); 572 else if (REGSIZE == 4) 573 movregconst(cdb,cs.Irm & 7,(cast(uint *)p)[1],0); 574 else if (REGSIZE == 8) 575 movregconst(cdb,cs.Irm & 7,p[1],0); 576 else 577 assert(0); 578 } 579 } 580 else if (I64 && sz == 8 && *p >= 0x80000000) 581 { // Use 64 bit MOV, as the 32 bit one gets sign extended 582 // MOV reg,imm64 583 // MOV EA,reg 584 regm_t rregm = allregs & ~idxregm(&cs); 585 reg_t regx; 586 regwithvalue(cdb,rregm,*p,®x,64); 587 cs.Iop = STO; 588 cs.Irm |= modregrm(0,regx & 7,0); 589 if (regx & 8) 590 cs.Irex |= REX_R; 591 cdb.gen(&cs); 592 } 593 else 594 { 595 int off = sz; 596 do 597 { int regsize = REGSIZE; 598 if (off >= 4 && I16 && config.target_cpu >= TARGET_80386) 599 { 600 regsize = 4; 601 cs.Iflags |= CFopsize; // use opsize to do 32 bit operation 602 } 603 else if (I64 && sz == 16 && *p >= 0x80000000) 604 { 605 regm_t rregm = allregs & ~idxregm(&cs); 606 reg_t regx; 607 regwithvalue(cdb,rregm,*p,®x,64); 608 cs.Iop = STO; 609 cs.Irm |= modregrm(0,regx & 7,0); 610 if (regx & 8) 611 cs.Irex |= REX_R; 612 } 613 else 614 { 615 regm_t retregsx = (sz == 1) ? BYTEREGS : allregs; 616 reg_t regx; 617 if (reghasvalue(retregsx,*p,®x)) 618 { 619 cs.Iop = (cs.Iop & 1) | 0x88; 620 cs.Irm |= modregrm(0,regx & 7,0); // MOV EA,regx 621 if (regx & 8) 622 cs.Irex |= REX_R; 623 if (I64 && sz == 1 && regx >= 4) 624 cs.Irex |= REX; 625 } 626 if (!I16 && off == 2) // if 16 bit operand 627 cs.Iflags |= CFopsize; 628 if (I64 && sz == 8) 629 cs.Irex |= REX_W; 630 } 631 cdb.gen(&cs); // MOV EA,const 632 633 p = cast(targ_size_t *)(cast(char *) p + regsize); 634 cs.Iop = (cs.Iop & 1) | 0xC6; 635 cs.Irm &= cast(ubyte)~cast(int)modregrm(0,7,0); 636 cs.Irex &= ~REX_R; 637 cs.IEV1.Voffset += regsize; 638 cs.IEV2.Vint = cast(int)*p; 639 off -= regsize; 640 } while (off > 0); 641 } 642 } 643 freenode(e2); 644 goto Lp; 645 } 646 retregs = allregs; // pick a reg, any reg 647 if (sz == 2 * REGSIZE) 648 retregs &= ~mBP; // BP cannot be used for register pair 649 } 650 if (retregs == mPSW) 651 { 652 retregs = allregs; 653 if (sz == 2 * REGSIZE) 654 retregs &= ~mBP; // BP cannot be used for register pair 655 } 656 cs.Iop = STO; 657 if (sz == 1) // must have byte regs 658 { 659 cs.Iop = 0x88; 660 retregs &= BYTEREGS; 661 if (!retregs) 662 retregs = BYTEREGS; 663 } 664 else if (retregs & mES && 665 ( 666 (e1.Eoper == OPind && 667 ((tymll = tybasic(e1.EV.E1.Ety)) == TYfptr || tymll == TYhptr)) || 668 (e1.Eoper == OPvar && e1.EV.Vsym.Sfl == FLfardata) 669 ) 670 ) 671 // getlvalue() needs ES, so we can't return it 672 retregs = allregs; // no conflicts with ES 673 else if (tyml == TYdouble || tyml == TYdouble_alias || retregs & mST0) 674 retregs = DOUBLEREGS; 675 676 regvar = false; 677 varregm = 0; 678 if (config.flags4 & CFG4optimized) 679 { 680 // Be careful of cases like (x = x+x+x). We cannot evaluate in 681 // x if x is in a register. 682 if (isregvar(e1,&varregm,&varreg) && // if lvalue is register variable 683 doinreg(e1.EV.Vsym,e2) && // and we can compute directly into it 684 !(sz == 1 && e1.EV.Voffset == 1) 685 ) 686 { 687 if (varregm & XMMREGS) 688 { 689 // Could be an integer vector in the XMMREGS 690 xmmeq(cdb, e, CMP, e1, e2, pretregs); 691 return; 692 } 693 regvar = true; 694 retregs = varregm; 695 reg = varreg; // evaluate directly in target register 696 if (tysize(e1.Ety) == REGSIZE && 697 tysize(e1.EV.Vsym.Stype.Tty) == 2 * REGSIZE) 698 { 699 if (e1.EV.Voffset) 700 retregs &= mMSW; 701 else 702 retregs &= mLSW; 703 reg = findreg(retregs); 704 } 705 } 706 } 707 if (*pretregs & mPSW && OTleaf(e1.Eoper)) // if evaluating e1 couldn't change flags 708 { // Be careful that this lines up with jmpopcode() 709 retregs |= mPSW; 710 *pretregs &= ~mPSW; 711 } 712 scodelem(cdb,e2,&retregs,0,true); // get rvalue 713 714 // Look for special case of (*p++ = ...), where p is a register variable 715 if (e1.Eoper == OPind && 716 ((e11 = e1.EV.E1).Eoper == OPpostinc || e11.Eoper == OPpostdec) && 717 e11.EV.E1.Eoper == OPvar && 718 e11.EV.E1.EV.Vsym.Sfl == FLreg && 719 (!I16 || e11.EV.E1.EV.Vsym.Sregm & IDXREGS) 720 ) 721 { 722 Symbol *s = e11.EV.E1.EV.Vsym; 723 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 724 { 725 regcon.params &= ~s.Spregm(); 726 } 727 728 postinc = e11.EV.E2.EV.Vint; 729 if (e11.Eoper == OPpostdec) 730 postinc = -postinc; 731 getlvalue(cdb,&cs,e1,RMstore | retregs); 732 freenode(e11.EV.E2); 733 } 734 else 735 { 736 postinc = 0; 737 getlvalue(cdb,&cs,e1,RMstore | retregs); // get lvalue (cl == null if regvar) 738 } 739 740 getregs(cdb,varregm); 741 742 assert(!(retregs & mES && (cs.Iflags & CFSEG) == CFes)); 743 if ((tyml == TYfptr || tyml == TYhptr) && retregs & mES) 744 { 745 reg = findreglsw(retregs); 746 cs.Irm |= modregrm(0,reg,0); 747 cdb.gen(&cs); // MOV EA,reg 748 getlvalue_msw(&cs); // point to where segment goes 749 cs.Iop = 0x8C; 750 NEWREG(cs.Irm,0); 751 cdb.gen(&cs); // MOV EA+2,ES 752 } 753 else 754 { 755 if (!I16) 756 { 757 reg = findreg(retregs & 758 ((sz > REGSIZE) ? mBP | mLSW : mBP | ALLREGS)); 759 cs.Irm |= modregrm(0,reg & 7,0); 760 if (reg & 8) 761 cs.Irex |= REX_R; 762 for (; true; sz -= REGSIZE) 763 { 764 // Do not generate mov from register onto itself 765 if (regvar && reg == ((cs.Irm & 7) | (cs.Irex & REX_B ? 8 : 0))) 766 break; 767 if (sz == 2) // if 16 bit operand 768 cs.Iflags |= CFopsize; 769 else if (sz == 1 && reg >= 4) 770 cs.Irex |= REX; 771 cdb.gen(&cs); // MOV EA+offset,reg 772 if (sz <= REGSIZE) 773 break; 774 getlvalue_msw(&cs); 775 reg = findregmsw(retregs); 776 code_newreg(&cs, reg); 777 } 778 } 779 else 780 { 781 if (sz > REGSIZE) 782 cs.IEV1.Voffset += sz - REGSIZE; // 0,2,6 783 reg = findreg(retregs & 784 (sz > REGSIZE ? mMSW : ALLREGS)); 785 if (tyml == TYdouble || tyml == TYdouble_alias) 786 reg = AX; 787 cs.Irm |= modregrm(0,reg,0); 788 // Do not generate mov from register onto itself 789 if (!regvar || reg != (cs.Irm & 7)) 790 for (; true; sz -= REGSIZE) // 1,2,4 791 { 792 cdb.gen(&cs); // MOV EA+offset,reg 793 if (sz <= REGSIZE) 794 break; 795 cs.IEV1.Voffset -= REGSIZE; 796 if (tyml == TYdouble || tyml == TYdouble_alias) 797 reg = dblreg[reg]; 798 else 799 reg = findreglsw(retregs); 800 NEWREG(cs.Irm,reg); 801 } 802 } 803 } 804 if (e1.Ecount || // if lvalue is a CSE or 805 regvar) // rvalue can't be a CSE 806 { 807 getregs_imm(cdb,retregs); // necessary if both lvalue and 808 // rvalue are CSEs (since a reg 809 // can hold only one e at a time) 810 cssave(e1,retregs,!OTleaf(e1.Eoper)); // if lvalue is a CSE 811 } 812 813 fixresult(cdb,e,retregs,pretregs); 814 Lp: 815 if (postinc) 816 { 817 reg_t ireg = findreg(idxregm(&cs)); 818 if (*pretregs & mPSW) 819 { // Use LEA to avoid touching the flags 820 uint rm = cs.Irm & 7; 821 if (cs.Irex & REX_B) 822 rm |= 8; 823 cdb.genc1(LEA,buildModregrm(2,ireg,rm),FLconst,postinc); 824 if (tysize(e11.EV.E1.Ety) == 8) 825 code_orrex(cdb.last(), REX_W); 826 } 827 else if (I64) 828 { 829 cdb.genc2(0x81,modregrmx(3,0,ireg),postinc); 830 if (tysize(e11.EV.E1.Ety) == 8) 831 code_orrex(cdb.last(), REX_W); 832 } 833 else 834 { 835 if (postinc == 1) 836 cdb.gen1(0x40 + ireg); // INC ireg 837 else if (postinc == -cast(targ_int)1) 838 cdb.gen1(0x48 + ireg); // DEC ireg 839 else 840 { 841 cdb.genc2(0x81,modregrm(3,0,ireg),postinc); 842 } 843 } 844 } 845 freenode(e1); 846 } 847 848 849 /************************ 850 * Generate code for += -= &= |= ^= negass 851 */ 852 853 void cdaddass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 854 { 855 //printf("cdaddass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 856 OPER op = e.Eoper; 857 regm_t retregs = 0; 858 uint reverse = 0; 859 elem *e1 = e.EV.E1; 860 tym_t tyml = tybasic(e1.Ety); // type of lvalue 861 int sz = _tysize[tyml]; 862 int isbyte = (sz == 1); // 1 for byte operation, else 0 863 864 // See if evaluate in XMM registers 865 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPnegass && !(*pretregs & mST0)) 866 { 867 xmmopass(cdb,e,pretregs); 868 return; 869 } 870 871 if (tyfloating(tyml)) 872 { 873 if (config.exe & EX_posix) 874 { 875 if (op == OPnegass) 876 cdnegass87(cdb,e,pretregs); 877 else 878 opass87(cdb,e,pretregs); 879 } 880 else 881 { 882 if (op == OPnegass) 883 opnegassdbl(cdb,e,pretregs); 884 else 885 opassdbl(cdb,e,pretregs,op); 886 } 887 return; 888 } 889 uint opsize = (I16 && tylong(tyml) && config.target_cpu >= TARGET_80386) 890 ? CFopsize : 0; 891 uint cflags = 0; 892 regm_t forccs = *pretregs & mPSW; // return result in flags 893 regm_t forregs = *pretregs & ~mPSW; // return result in regs 894 // true if we want the result in a register 895 uint wantres = forregs || (e1.Ecount && !OTleaf(e1.Eoper)); 896 897 reg_t reg; 898 uint op1,op2,mode; 899 code cs; 900 elem *e2; 901 regm_t varregm; 902 reg_t varreg; 903 uint jop; 904 905 906 switch (op) // select instruction opcodes 907 { 908 case OPpostinc: op = OPaddass; // i++ => += 909 goto case OPaddass; 910 911 case OPaddass: op1 = 0x01; op2 = 0x11; 912 cflags = CFpsw; 913 mode = 0; break; // ADD, ADC 914 915 case OPpostdec: op = OPminass; // i-- => -= 916 goto case OPminass; 917 918 case OPminass: op1 = 0x29; op2 = 0x19; 919 cflags = CFpsw; 920 mode = 5; break; // SUB, SBC 921 922 case OPandass: op1 = op2 = 0x21; 923 mode = 4; break; // AND, AND 924 925 case OPorass: op1 = op2 = 0x09; 926 mode = 1; break; // OR , OR 927 928 case OPxorass: op1 = op2 = 0x31; 929 mode = 6; break; // XOR, XOR 930 931 case OPnegass: op1 = 0xF7; // NEG 932 break; 933 934 default: 935 assert(0); 936 } 937 op1 ^= isbyte; // bit 0 is 0 for byte operation 938 939 if (op == OPnegass) 940 { 941 getlvalue(cdb,&cs,e1,0); 942 modEA(cdb,&cs); 943 cs.Irm |= modregrm(0,3,0); 944 cs.Iop = op1; 945 switch (_tysize[tyml]) 946 { 947 case CHARSIZE: 948 cdb.gen(&cs); 949 break; 950 951 case SHORTSIZE: 952 cdb.gen(&cs); 953 if (!I16 && *pretregs & mPSW) 954 cdb.last().Iflags |= CFopsize | CFpsw; 955 break; 956 957 case LONGSIZE: 958 if (!I16 || opsize) 959 { cdb.gen(&cs); 960 cdb.last().Iflags |= opsize; 961 break; 962 } 963 neg_2reg: 964 getlvalue_msw(&cs); 965 cdb.gen(&cs); // NEG EA+2 966 getlvalue_lsw(&cs); 967 cdb.gen(&cs); // NEG EA 968 code_orflag(cdb.last(),CFpsw); 969 cs.Iop = 0x81; 970 getlvalue_msw(&cs); 971 cs.IFL2 = FLconst; 972 cs.IEV2.Vuns = 0; 973 cdb.gen(&cs); // SBB EA+2,0 974 break; 975 976 case LLONGSIZE: 977 if (I16) 978 assert(0); // not implemented yet 979 if (I32) 980 goto neg_2reg; 981 cdb.gen(&cs); 982 break; 983 984 default: 985 assert(0); 986 } 987 forccs = 0; // flags already set by NEG 988 *pretregs &= ~mPSW; 989 } 990 else if ((e2 = e.EV.E2).Eoper == OPconst && // if rvalue is a const 991 el_signx32(e2) && 992 // Don't evaluate e2 in register if we can use an INC or DEC 993 (((sz <= REGSIZE || tyfv(tyml)) && 994 (op == OPaddass || op == OPminass) && 995 (el_allbits(e2, 1) || el_allbits(e2, -1)) 996 ) || 997 (!evalinregister(e2) 998 && tyml != TYhptr 999 ) 1000 ) 1001 ) 1002 { 1003 getlvalue(cdb,&cs,e1,0); 1004 modEA(cdb,&cs); 1005 cs.IFL2 = FLconst; 1006 cs.IEV2.Vsize_t = e2.EV.Vint; 1007 if (sz <= REGSIZE || tyfv(tyml) || opsize) 1008 { 1009 targ_int i = cs.IEV2.Vint; 1010 1011 // Handle shortcuts. Watch out for if result has 1012 // to be in flags. 1013 1014 if (reghasvalue(isbyte ? BYTEREGS : ALLREGS,i,®) && i != 1 && i != -1 && 1015 !opsize) 1016 { 1017 cs.Iop = op1; 1018 cs.Irm |= modregrm(0,reg & 7,0); 1019 if (I64) 1020 { if (isbyte && reg >= 4) 1021 cs.Irex |= REX; 1022 if (reg & 8) 1023 cs.Irex |= REX_R; 1024 } 1025 } 1026 else 1027 { 1028 cs.Iop = 0x81; 1029 cs.Irm |= modregrm(0,mode,0); 1030 switch (op) 1031 { 1032 case OPminass: // convert to += 1033 cs.Irm ^= modregrm(0,5,0); 1034 i = -i; 1035 cs.IEV2.Vsize_t = i; 1036 goto case OPaddass; 1037 1038 case OPaddass: 1039 if (i == 1) // INC EA 1040 goto L1; 1041 else if (i == -1) // DEC EA 1042 { cs.Irm |= modregrm(0,1,0); 1043 L1: cs.Iop = 0xFF; 1044 } 1045 break; 1046 1047 default: 1048 break; 1049 } 1050 cs.Iop ^= isbyte; // for byte operations 1051 } 1052 cs.Iflags |= opsize; 1053 if (forccs) 1054 cs.Iflags |= CFpsw; 1055 else if (!I16 && cs.Iflags & CFopsize) 1056 { 1057 switch (op) 1058 { case OPorass: 1059 case OPxorass: 1060 cs.IEV2.Vsize_t &= 0xFFFF; 1061 cs.Iflags &= ~CFopsize; // don't worry about MSW 1062 break; 1063 1064 case OPandass: 1065 cs.IEV2.Vsize_t |= ~0xFFFFL; 1066 cs.Iflags &= ~CFopsize; // don't worry about MSW 1067 break; 1068 1069 case OPminass: 1070 case OPaddass: 1071 static if (1) 1072 { 1073 if ((cs.Irm & 0xC0) == 0xC0) // EA is register 1074 cs.Iflags &= ~CFopsize; 1075 } 1076 else 1077 { 1078 if ((cs.Irm & 0xC0) == 0xC0 && // EA is register and 1079 e1.Eoper == OPind) // not a register var 1080 cs.Iflags &= ~CFopsize; 1081 } 1082 break; 1083 1084 default: 1085 assert(0); 1086 } 1087 } 1088 1089 // For scheduling purposes, we wish to replace: 1090 // OP EA 1091 // with: 1092 // MOV reg,EA 1093 // OP reg 1094 // MOV EA,reg 1095 if (forregs && sz <= REGSIZE && (cs.Irm & 0xC0) != 0xC0 && 1096 (config.target_cpu == TARGET_Pentium || 1097 config.target_cpu == TARGET_PentiumMMX) && 1098 config.flags4 & CFG4speed) 1099 { 1100 regm_t sregm; 1101 code cs2; 1102 1103 // Determine which registers to use 1104 sregm = allregs & ~idxregm(&cs); 1105 if (isbyte) 1106 sregm &= BYTEREGS; 1107 if (sregm & forregs) 1108 sregm &= forregs; 1109 1110 allocreg(cdb,&sregm,®,tyml); // allocate register 1111 1112 cs2 = cs; 1113 cs2.Iflags &= ~CFpsw; 1114 cs2.Iop = LOD ^ isbyte; 1115 code_newreg(&cs2, reg); 1116 cdb.gen(&cs2); // MOV reg,EA 1117 1118 cs.Irm = (cs.Irm & modregrm(0,7,0)) | modregrm(3,0,reg & 7); 1119 if (reg & 8) 1120 cs.Irex |= REX_B; 1121 cdb.gen(&cs); // OP reg 1122 1123 cs2.Iop ^= 2; 1124 cdb.gen(&cs2); // MOV EA,reg 1125 1126 retregs = sregm; 1127 wantres = 0; 1128 if (e1.Ecount) 1129 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1130 } 1131 else 1132 { 1133 cdb.gen(&cs); 1134 cs.Iflags &= ~opsize; 1135 cs.Iflags &= ~CFpsw; 1136 if (I16 && opsize) // if DWORD operand 1137 cs.IEV1.Voffset += 2; // compensate for wantres code 1138 } 1139 } 1140 else if (sz == 2 * REGSIZE) 1141 { 1142 targ_uns msw; 1143 1144 cs.Iop = 0x81; 1145 cs.Irm |= modregrm(0,mode,0); 1146 cs.Iflags |= cflags; 1147 cdb.gen(&cs); 1148 cs.Iflags &= ~CFpsw; 1149 1150 getlvalue_msw(&cs); // point to msw 1151 msw = cast(uint)MSREG(e.EV.E2.EV.Vllong); 1152 cs.IEV2.Vuns = msw; // msw of constant 1153 switch (op) 1154 { 1155 case OPminass: 1156 cs.Irm ^= modregrm(0,6,0); // SUB => SBB 1157 break; 1158 1159 case OPaddass: 1160 cs.Irm |= modregrm(0,2,0); // ADD => ADC 1161 break; 1162 1163 default: 1164 break; 1165 } 1166 cdb.gen(&cs); 1167 } 1168 else 1169 assert(0); 1170 freenode(e.EV.E2); // don't need it anymore 1171 } 1172 else if (isregvar(e1,&varregm,&varreg) && 1173 (e2.Eoper == OPvar || e2.Eoper == OPind) && 1174 !evalinregister(e2) && 1175 sz <= REGSIZE) // deal with later 1176 { 1177 getlvalue(cdb,&cs,e2,0); 1178 freenode(e2); 1179 getregs(cdb,varregm); 1180 code_newreg(&cs, varreg); 1181 if (I64 && sz == 1 && varreg >= 4) 1182 cs.Irex |= REX; 1183 cs.Iop = op1 ^ 2; // toggle direction bit 1184 if (forccs) 1185 cs.Iflags |= CFpsw; 1186 reverse = 2; // remember we toggled it 1187 cdb.gen(&cs); 1188 retregs = 0; // to trigger a bug if we attempt to use it 1189 } 1190 else if ((op == OPaddass || op == OPminass) && 1191 sz <= REGSIZE && 1192 !e2.Ecount && 1193 ((jop = jmpopcode(e2)) == JC || jop == JNC || 1194 (OTconv(e2.Eoper) && !e2.EV.E1.Ecount && ((jop = jmpopcode(e2.EV.E1)) == JC || jop == JNC))) 1195 ) 1196 { 1197 /* e1 += (x < y) ADC EA,0 1198 * e1 -= (x < y) SBB EA,0 1199 * e1 += (x >= y) SBB EA,-1 1200 * e1 -= (x >= y) ADC EA,-1 1201 */ 1202 getlvalue(cdb,&cs,e1,0); // get lvalue 1203 modEA(cdb,&cs); 1204 regm_t keepmsk = idxregm(&cs); 1205 retregs = mPSW; 1206 if (OTconv(e2.Eoper)) 1207 { 1208 scodelem(cdb,e2.EV.E1,&retregs,keepmsk,true); 1209 freenode(e2); 1210 } 1211 else 1212 scodelem(cdb,e2,&retregs,keepmsk,true); 1213 cs.Iop = 0x81 ^ isbyte; // ADC EA,imm16/32 1214 uint regop = 2; // ADC 1215 if ((op == OPaddass) ^ (jop == JC)) 1216 regop = 3; // SBB 1217 code_newreg(&cs,regop); 1218 cs.Iflags |= opsize; 1219 if (forccs) 1220 cs.Iflags |= CFpsw; 1221 cs.IFL2 = FLconst; 1222 cs.IEV2.Vsize_t = (jop == JC) ? 0 : ~cast(targ_size_t)0; 1223 cdb.gen(&cs); 1224 retregs = 0; // to trigger a bug if we attempt to use it 1225 } 1226 else // evaluate e2 into register 1227 { 1228 retregs = (isbyte) ? BYTEREGS : ALLREGS; // pick working reg 1229 if (tyml == TYhptr) 1230 retregs &= ~mCX; // need CX for shift count 1231 scodelem(cdb,e.EV.E2,&retregs,0,true); // get rvalue 1232 getlvalue(cdb,&cs,e1,retregs); // get lvalue 1233 modEA(cdb,&cs); 1234 cs.Iop = op1; 1235 if (sz <= REGSIZE || tyfv(tyml)) 1236 { 1237 reg = findreg(retregs); 1238 code_newreg(&cs, reg); // OP1 EA,reg 1239 if (sz == 1 && reg >= 4 && I64) 1240 cs.Irex |= REX; 1241 if (forccs) 1242 cs.Iflags |= CFpsw; 1243 } 1244 else if (tyml == TYhptr) 1245 { 1246 uint mreg = findregmsw(retregs); 1247 uint lreg = findreglsw(retregs); 1248 getregs(cdb,retregs | mCX); 1249 1250 // If h -= l, convert to h += -l 1251 if (e.Eoper == OPminass) 1252 { 1253 cdb.gen2(0xF7,modregrm(3,3,mreg)); // NEG mreg 1254 cdb.gen2(0xF7,modregrm(3,3,lreg)); // NEG lreg 1255 code_orflag(cdb.last(),CFpsw); 1256 cdb.genc2(0x81,modregrm(3,3,mreg),0); // SBB mreg,0 1257 } 1258 cs.Iop = 0x01; 1259 cs.Irm |= modregrm(0,lreg,0); 1260 cdb.gen(&cs); // ADD EA,lreg 1261 code_orflag(cdb.last(),CFpsw); 1262 cdb.genc2(0x81,modregrm(3,2,mreg),0); // ADC mreg,0 1263 genshift(cdb); // MOV CX,offset __AHSHIFT 1264 cdb.gen2(0xD3,modregrm(3,4,mreg)); // SHL mreg,CL 1265 NEWREG(cs.Irm,mreg); // ADD EA+2,mreg 1266 getlvalue_msw(&cs); 1267 } 1268 else if (sz == 2 * REGSIZE) 1269 { 1270 cs.Irm |= modregrm(0,findreglsw(retregs),0); 1271 cdb.gen(&cs); // OP1 EA,reg+1 1272 code_orflag(cdb.last(),cflags); 1273 cs.Iop = op2; 1274 NEWREG(cs.Irm,findregmsw(retregs)); // OP2 EA+1,reg 1275 getlvalue_msw(&cs); 1276 } 1277 else 1278 assert(0); 1279 cdb.gen(&cs); 1280 retregs = 0; // to trigger a bug if we attempt to use it 1281 } 1282 1283 // See if we need to reload result into a register. 1284 // Need result in registers in case we have a 32 bit 1285 // result and we want the flags as a result. 1286 if (wantres || (sz > REGSIZE && forccs)) 1287 { 1288 if (sz <= REGSIZE) 1289 { 1290 regm_t possregs; 1291 1292 possregs = ALLREGS; 1293 if (isbyte) 1294 possregs = BYTEREGS; 1295 retregs = forregs & possregs; 1296 if (!retregs) 1297 retregs = possregs; 1298 1299 // If reg field is destination 1300 if (cs.Iop & 2 && cs.Iop < 0x40 && (cs.Iop & 7) <= 5) 1301 { 1302 reg = (cs.Irm >> 3) & 7; 1303 if (cs.Irex & REX_R) 1304 reg |= 8; 1305 retregs = mask(reg); 1306 allocreg(cdb,&retregs,®,tyml); 1307 } 1308 // If lvalue is a register, just use that register 1309 else if ((cs.Irm & 0xC0) == 0xC0) 1310 { 1311 reg = cs.Irm & 7; 1312 if (cs.Irex & REX_B) 1313 reg |= 8; 1314 retregs = mask(reg); 1315 allocreg(cdb,&retregs,®,tyml); 1316 } 1317 else 1318 { 1319 allocreg(cdb,&retregs,®,tyml); 1320 cs.Iop = LOD ^ isbyte ^ reverse; 1321 code_newreg(&cs, reg); 1322 if (I64 && isbyte && reg >= 4) 1323 cs.Irex |= REX_W; 1324 cdb.gen(&cs); // MOV reg,EA 1325 } 1326 } 1327 else if (tyfv(tyml) || tyml == TYhptr) 1328 { 1329 regm_t idxregs; 1330 1331 if (tyml == TYhptr) 1332 getlvalue_lsw(&cs); 1333 idxregs = idxregm(&cs); 1334 retregs = forregs & ~idxregs; 1335 if (!(retregs & IDXREGS)) 1336 retregs |= IDXREGS & ~idxregs; 1337 if (!(retregs & mMSW)) 1338 retregs |= mMSW & ALLREGS; 1339 allocreg(cdb,&retregs,®,tyml); 1340 NEWREG(cs.Irm,findreglsw(retregs)); 1341 if (retregs & mES) // if want ES loaded 1342 { 1343 cs.Iop = 0xC4; 1344 cdb.gen(&cs); // LES lreg,EA 1345 } 1346 else 1347 { 1348 cs.Iop = LOD; 1349 cdb.gen(&cs); // MOV lreg,EA 1350 getlvalue_msw(&cs); 1351 if (I32) 1352 cs.Iflags |= CFopsize; 1353 NEWREG(cs.Irm,reg); 1354 cdb.gen(&cs); // MOV mreg,EA+2 1355 } 1356 } 1357 else if (sz == 2 * REGSIZE) 1358 { 1359 regm_t idx = idxregm(&cs); 1360 retregs = forregs; 1361 if (!retregs) 1362 retregs = ALLREGS; 1363 allocreg(cdb,&retregs,®,tyml); 1364 cs.Iop = LOD; 1365 NEWREG(cs.Irm,reg); 1366 1367 code csl = cs; 1368 NEWREG(csl.Irm,findreglsw(retregs)); 1369 getlvalue_lsw(&csl); 1370 1371 if (mask(reg) & idx) 1372 { 1373 cdb.gen(&csl); // MOV reg+1,EA 1374 cdb.gen(&cs); // MOV reg,EA+2 1375 } 1376 else 1377 { 1378 cdb.gen(&cs); // MOV reg,EA+2 1379 cdb.gen(&csl); // MOV reg+1,EA 1380 } 1381 } 1382 else 1383 assert(0); 1384 if (e1.Ecount) // if we gen a CSE 1385 cssave(e1,retregs,!OTleaf(e1.Eoper)); 1386 } 1387 freenode(e1); 1388 if (sz <= REGSIZE) 1389 *pretregs &= ~mPSW; // flags are already set 1390 fixresult(cdb,e,retregs,pretregs); 1391 } 1392 1393 /******************************** 1394 * Generate code for *= 1395 */ 1396 1397 void cdmulass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1398 { 1399 code cs; 1400 regm_t retregs; 1401 reg_t resreg; 1402 uint opr,isbyte; 1403 1404 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1405 elem *e1 = e.EV.E1; 1406 elem *e2 = e.EV.E2; 1407 OPER op = e.Eoper; // OPxxxx 1408 1409 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1410 char uns = tyuns(tyml) || tyuns(e2.Ety); 1411 uint sz = _tysize[tyml]; 1412 1413 uint rex = (I64 && sz == 8) ? REX_W : 0; 1414 uint grex = rex << 16; // 64 bit operands 1415 1416 // See if evaluate in XMM registers 1417 if (config.fpxmmregs && tyxmmreg(tyml) && !(*pretregs & mST0)) 1418 { 1419 xmmopass(cdb,e,pretregs); 1420 return; 1421 } 1422 1423 if (tyfloating(tyml)) 1424 { 1425 if (config.exe & EX_posix) 1426 { 1427 opass87(cdb,e,pretregs); 1428 } 1429 else 1430 { 1431 opassdbl(cdb,e,pretregs,op); 1432 } 1433 return; 1434 } 1435 1436 if (sz <= REGSIZE) // if word or byte 1437 { 1438 if (e2.Eoper == OPconst && 1439 (I32 || I64) && 1440 el_signx32(e2) && 1441 sz >= 4) 1442 { 1443 // See if we can use an LEA instruction 1444 1445 int ss; 1446 int ss2 = 0; 1447 int shift; 1448 1449 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1450 switch (e2factor) 1451 { 1452 case 12: ss = 1; ss2 = 2; goto L4; 1453 case 24: ss = 1; ss2 = 3; goto L4; 1454 1455 case 6: 1456 case 3: ss = 1; goto L4; 1457 1458 case 20: ss = 2; ss2 = 2; goto L4; 1459 case 40: ss = 2; ss2 = 3; goto L4; 1460 1461 case 10: 1462 case 5: ss = 2; goto L4; 1463 1464 case 36: ss = 3; ss2 = 2; goto L4; 1465 case 72: ss = 3; ss2 = 3; goto L4; 1466 1467 case 18: 1468 case 9: ss = 3; goto L4; 1469 L4: 1470 { 1471 getlvalue(cdb,&cs,e1,0); // get EA 1472 modEA(cdb,&cs); 1473 freenode(e2); 1474 regm_t idxregs = idxregm(&cs); 1475 regm_t regm = *pretregs & ~(idxregs | mBP | mR13); // don't use EBP 1476 if (!regm) 1477 regm = allregs & ~(idxregs | mBP | mR13); 1478 reg_t reg; 1479 allocreg(cdb,®m,®,tyml); 1480 cs.Iop = LOD; 1481 code_newreg(&cs,reg); 1482 cs.Irex |= rex; 1483 cdb.gen(&cs); // MOV reg,EA 1484 1485 assert((reg & 7) != BP); 1486 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1487 modregxrmx(ss,reg,reg)); // LEA reg,[ss*reg][reg] 1488 if (ss2) 1489 { 1490 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1491 modregxrm(ss2,reg,5)); 1492 cdb.last().IFL1 = FLconst; 1493 cdb.last().IEV1.Vint = 0; // LEA reg,0[ss2*reg] 1494 } 1495 else if (!(e2factor & 1)) // if even factor 1496 { 1497 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1498 code_orrex(cdb.last(),rex); 1499 } 1500 opAssStoreReg(cdb,cs,e,reg,pretregs); 1501 return; 1502 } 1503 1504 case 37: 1505 case 74: shift = 2; 1506 goto L5; 1507 case 13: 1508 case 26: shift = 0; 1509 goto L5; 1510 L5: 1511 { 1512 getlvalue(cdb,&cs,e1,0); // get EA 1513 modEA(cdb,&cs); 1514 freenode(e2); 1515 regm_t idxregs = idxregm(&cs); 1516 regm_t regm = *pretregs & ~(idxregs | mBP | mR13); // don't use EBP 1517 if (!regm) 1518 regm = allregs & ~(idxregs | mBP | mR13); 1519 reg_t reg; // return register 1520 allocreg(cdb,®m,®,tyml); 1521 1522 reg_t sreg = allocScratchReg(cdb, allregs & ~(regm | idxregs | mBP | mR13)); 1523 1524 cs.Iop = LOD; 1525 code_newreg(&cs,sreg); 1526 cs.Irex |= rex; 1527 cdb.gen(&cs); // MOV sreg,EA 1528 1529 assert((sreg & 7) != BP); 1530 assert((reg & 7) != BP); 1531 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1532 modregxrmx(2,sreg,sreg)); // LEA reg,[sreg*4][sreg] 1533 if (shift) 1534 cdb.genc2(0xC1,grex | modregrmx(3,4,sreg),shift); // SHL sreg,shift 1535 cdb.gen2sib(LEA,grex | modregxrm(0,reg,4), 1536 modregxrmx(3,sreg,reg)); // LEA reg,[sreg*8][reg] 1537 if (!(e2factor & 1)) // if even factor 1538 { 1539 genregs(cdb,0x03,reg,reg); // ADD reg,reg 1540 code_orrex(cdb.last(),rex); 1541 } 1542 opAssStoreReg(cdb,cs,e,reg,pretregs); 1543 return; 1544 } 1545 1546 default: 1547 break; 1548 } 1549 } 1550 1551 isbyte = (sz == 1); // 1 for byte operation 1552 1553 if (config.target_cpu >= TARGET_80286 && 1554 e2.Eoper == OPconst && !isbyte) 1555 { 1556 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1557 if (I64 && sz == 8 && e2factor != cast(int)e2factor) 1558 goto L1; 1559 freenode(e2); 1560 getlvalue(cdb,&cs,e1,0); // get EA 1561 regm_t idxregs = idxregm(&cs); 1562 retregs = *pretregs & (ALLREGS | mBP) & ~idxregs; 1563 if (!retregs) 1564 retregs = ALLREGS & ~idxregs; 1565 allocreg(cdb,&retregs,&resreg,tyml); 1566 cs.Iop = 0x69; // IMUL reg,EA,e2value 1567 cs.IFL2 = FLconst; 1568 cs.IEV2.Vint = cast(int)e2factor; 1569 opr = resreg; 1570 } 1571 else if (!I16 && !isbyte) 1572 { 1573 L1: 1574 retregs = *pretregs & (ALLREGS | mBP); 1575 if (!retregs) 1576 retregs = ALLREGS; 1577 codelem(cdb,e2,&retregs,false); // load rvalue in reg 1578 getlvalue(cdb,&cs,e1,retregs); // get EA 1579 getregs(cdb,retregs); // destroy these regs 1580 cs.Iop = 0x0FAF; // IMUL resreg,EA 1581 resreg = findreg(retregs); 1582 opr = resreg; 1583 } 1584 else 1585 { 1586 retregs = mAX; 1587 codelem(cdb,e2,&retregs,false); // load rvalue in AX 1588 getlvalue(cdb,&cs,e1,mAX); // get EA 1589 getregs(cdb,isbyte ? mAX : mAX | mDX); // destroy these regs 1590 cs.Iop = 0xF7 ^ isbyte; // [I]MUL EA 1591 opr = uns ? 4 : 5; // MUL/IMUL 1592 resreg = AX; // result register for * 1593 } 1594 code_newreg(&cs,opr); 1595 cdb.gen(&cs); 1596 1597 opAssStoreReg(cdb, cs, e, resreg, pretregs); 1598 return; 1599 } 1600 else if (sz == 2 * REGSIZE) 1601 { 1602 if (e2.Eoper == OPconst && I32) 1603 { 1604 /* if (msw) 1605 IMUL EDX,EDX,lsw 1606 IMUL reg,EAX,msw 1607 ADD reg,EDX 1608 else 1609 IMUL reg,EDX,lsw 1610 MOV EDX,lsw 1611 MUL EDX 1612 ADD EDX,reg 1613 */ 1614 freenode(e2); 1615 retregs = mDX|mAX; 1616 reg_t rhi, rlo; 1617 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 1618 const regm_t keepmsk = idxregm(&cs); 1619 1620 reg_t reg = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 1621 1622 targ_size_t e2factor = cast(targ_size_t)el_tolong(e2); 1623 const lsw = cast(targ_int)(e2factor & ((1L << (REGSIZE * 8)) - 1)); 1624 const msw = cast(targ_int)(e2factor >> (REGSIZE * 8)); 1625 1626 if (msw) 1627 { 1628 genmulimm(cdb,DX,DX,lsw); // IMUL EDX,EDX,lsw 1629 genmulimm(cdb,reg,AX,msw); // IMUL reg,EAX,msw 1630 cdb.gen2(0x03,modregrm(3,reg,DX)); // ADD reg,EAX 1631 } 1632 else 1633 genmulimm(cdb,reg,DX,lsw); // IMUL reg,EDX,lsw 1634 1635 movregconst(cdb,DX,lsw,0); // MOV EDX,lsw 1636 getregs(cdb,mDX); 1637 cdb.gen2(0xF7,modregrm(3,4,DX)); // MUL EDX 1638 cdb.gen2(0x03,modregrm(3,DX,reg)); // ADD EDX,reg 1639 } 1640 else 1641 { 1642 retregs = mDX | mAX; 1643 regm_t rretregs = (config.target_cpu >= TARGET_PentiumPro) ? allregs & ~retregs : mCX | mBX; 1644 codelem(cdb,e2,&rretregs,false); 1645 getlvalue(cdb,&cs,e1,retregs | rretregs); 1646 getregs(cdb,retregs); 1647 cs.Iop = LOD; 1648 cdb.gen(&cs); // MOV AX,EA 1649 getlvalue_msw(&cs); 1650 cs.Irm |= modregrm(0,DX,0); 1651 cdb.gen(&cs); // MOV DX,EA+2 1652 getlvalue_lsw(&cs); 1653 if (config.target_cpu >= TARGET_PentiumPro) 1654 { 1655 regm_t rlo = findreglsw(rretregs); 1656 regm_t rhi = findregmsw(rretregs); 1657 /* IMUL rhi,EAX 1658 IMUL EDX,rlo 1659 ADD rhi,EDX 1660 MUL rlo 1661 ADD EDX,Erhi 1662 */ 1663 getregs(cdb,mAX|mDX|mask(rhi)); 1664 cdb.gen2(0x0FAF,modregrm(3,rhi,AX)); 1665 cdb.gen2(0x0FAF,modregrm(3,DX,rlo)); 1666 cdb.gen2(0x03,modregrm(3,rhi,DX)); 1667 cdb.gen2(0xF7,modregrm(3,4,rlo)); 1668 cdb.gen2(0x03,modregrm(3,DX,rhi)); 1669 } 1670 else 1671 { 1672 callclib(cdb,e,CLIB.lmul,&retregs,idxregm(&cs)); 1673 } 1674 } 1675 1676 opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs); 1677 return; 1678 } 1679 else 1680 { 1681 assert(0); 1682 } 1683 } 1684 1685 1686 /******************************** 1687 * Generate code for /= %= 1688 */ 1689 1690 void cddivass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 1691 { 1692 elem *e1 = e.EV.E1; 1693 elem *e2 = e.EV.E2; 1694 1695 tym_t tyml = tybasic(e1.Ety); // type of lvalue 1696 OPER op = e.Eoper; // OPxxxx 1697 1698 // See if evaluate in XMM registers 1699 if (config.fpxmmregs && tyxmmreg(tyml) && op != OPmodass && !(*pretregs & mST0)) 1700 { 1701 xmmopass(cdb,e,pretregs); 1702 return; 1703 } 1704 1705 if (tyfloating(tyml)) 1706 { 1707 if (config.exe & EX_posix) 1708 { 1709 opass87(cdb,e,pretregs); 1710 } 1711 else 1712 { 1713 opassdbl(cdb,e,pretregs,op); 1714 } 1715 return; 1716 } 1717 1718 code cs = void; 1719 1720 //printf("cddivass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 1721 char uns = tyuns(tyml) || tyuns(e2.Ety); 1722 uint sz = _tysize[tyml]; 1723 1724 uint rex = (I64 && sz == 8) ? REX_W : 0; 1725 uint grex = rex << 16; // 64 bit operands 1726 1727 if (sz <= REGSIZE) // if word or byte 1728 { 1729 uint isbyte = (sz == 1); // 1 for byte operation 1730 reg_t resreg; 1731 targ_size_t e2factor; 1732 targ_size_t d; 1733 bool neg; 1734 int pow2; 1735 1736 assert(!isbyte); // should never happen 1737 assert(I16 || sz != SHORTSIZE); 1738 1739 if (e2.Eoper == OPconst) 1740 { 1741 e2factor = cast(targ_size_t)el_tolong(e2); 1742 pow2 = ispow2(e2factor); 1743 d = e2factor; 1744 if (!uns && cast(targ_llong)e2factor < 0) 1745 { 1746 neg = true; 1747 d = -d; 1748 } 1749 } 1750 1751 // Signed divide by a constant 1752 if (config.flags4 & CFG4speed && 1753 e2.Eoper == OPconst && 1754 !uns && 1755 (d & (d - 1)) && 1756 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8)))) 1757 { 1758 /* R1 / 10 1759 * 1760 * MOV EAX,m 1761 * IMUL R1 1762 * MOV EAX,R1 1763 * SAR EAX,31 1764 * SAR EDX,shpost 1765 * SUB EDX,EAX 1766 * IMUL EAX,EDX,d 1767 * SUB R1,EAX 1768 * 1769 * EDX = quotient 1770 * R1 = remainder 1771 */ 1772 assert(sz == 4 || sz == 8); 1773 1774 ulong m; 1775 int shpost; 1776 const int N = sz * 8; 1777 const bool mhighbit = choose_multiplier(N, d, N - 1, &m, &shpost); 1778 1779 freenode(e2); 1780 1781 getlvalue(cdb,&cs,e1,mAX | mDX); 1782 reg_t reg; 1783 opAssLoadReg(cdb, cs, e, reg, allregs & ~( mAX | mDX | idxregm(&cs))); // MOV reg,EA 1784 getregs(cdb, mAX|mDX); 1785 1786 /* Algorithm 5.2 1787 * if m>=2**(N-1) 1788 * q = SRA(n + MULSH(m-2**N,n), shpost) - XSIGN(n) 1789 * else 1790 * q = SRA(MULSH(m,n), shpost) - XSIGN(n) 1791 * if (neg) 1792 * q = -q 1793 */ 1794 const bool mgt = mhighbit || m >= (1UL << (N - 1)); 1795 movregconst(cdb, AX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EAX,m 1796 cdb.gen2(0xF7,grex | modregrmx(3,5,reg)); // IMUL reg 1797 if (mgt) 1798 cdb.gen2(0x03,grex | modregrmx(3,DX,reg)); // ADD EDX,reg 1799 getregsNoSave(mAX); // EAX no longer contains 'm' 1800 genmovreg(cdb, AX, reg); // MOV EAX,reg 1801 cdb.genc2(0xC1,grex | modregrm(3,7,AX),sz * 8 - 1); // SAR EAX,31 1802 if (shpost) 1803 cdb.genc2(0xC1,grex | modregrm(3,7,DX),shpost); // SAR EDX,shpost 1804 reg_t r3; 1805 if (neg && op == OPdivass) 1806 { 1807 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1808 r3 = AX; 1809 } 1810 else 1811 { 1812 cdb.gen2(0x2B,grex | modregrm(3,DX,AX)); // SUB EDX,EAX 1813 r3 = DX; 1814 } 1815 1816 // r3 is quotient 1817 reg_t resregx; 1818 switch (op) 1819 { case OPdivass: 1820 resregx = r3; 1821 break; 1822 1823 case OPmodass: 1824 assert(reg != AX && r3 == DX); 1825 if (sz == 4 || (sz == 8 && cast(targ_long)d == d)) 1826 { 1827 cdb.genc2(0x69,grex | modregrm(3,AX,DX),d); // IMUL EAX,EDX,d 1828 } 1829 else 1830 { 1831 movregconst(cdb,AX,d,(sz == 8) ? 0x40 : 0); // MOV EAX,d 1832 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,DX)); // IMUL EAX,EDX 1833 getregsNoSave(mAX); // EAX no longer contains 'd' 1834 } 1835 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB R1,EAX 1836 resregx = reg; 1837 break; 1838 1839 default: 1840 assert(0); 1841 } 1842 1843 opAssStoreReg(cdb, cs, e, resregx, pretregs); 1844 return; 1845 } 1846 1847 // Unsigned divide by a constant 1848 void unsignedDivideByConstant(ref CodeBuilder cdb) 1849 { 1850 assert(sz == 4 || sz == 8); 1851 1852 reg_t r3; 1853 reg_t reg; 1854 ulong m; 1855 int shpre; 1856 int shpost; 1857 code cs = void; 1858 1859 if (udiv_coefficients(sz * 8, e2factor, &shpre, &m, &shpost)) 1860 { 1861 /* t1 = MULUH(m, n) 1862 * q = SRL(t1 + SRL(n - t1, 1), shpost - 1) 1863 * MOV EAX,reg 1864 * MOV EDX,m 1865 * MUL EDX 1866 * MOV EAX,reg 1867 * SUB EAX,EDX 1868 * SHR EAX,1 1869 * LEA R3,[EAX][EDX] 1870 * SHR R3,shpost-1 1871 */ 1872 assert(shpre == 0); 1873 1874 freenode(e2); 1875 getlvalue(cdb,&cs,e1,mAX | mDX); 1876 regm_t idxregs = idxregm(&cs); 1877 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA 1878 getregs(cdb, mAX|mDX); 1879 1880 genmovreg(cdb,AX,reg); // MOV EAX,reg 1881 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1882 getregs(cdb,mask(reg) | mDX | mAX); 1883 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1884 genmovreg(cdb,AX,reg); // MOV EAX,reg 1885 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB EAX,EDX 1886 cdb.genc2(0xC1,grex | modregrm(3,5,AX),1); // SHR EAX,1 1887 regm_t regm3 = allregs & ~idxregs; 1888 if (op == OPmodass) 1889 { 1890 regm3 &= ~mask(reg); 1891 if (!el_signx32(e2)) 1892 regm3 &= ~mAX; 1893 } 1894 allocreg(cdb,®m3,&r3,TYint); 1895 cdb.gen2sib(LEA,grex | modregxrm(0,r3,4),modregrm(0,AX,DX)); // LEA R3,[EAX][EDX] 1896 if (shpost != 1) 1897 cdb.genc2(0xC1,grex | modregrmx(3,5,r3),shpost-1); // SHR R3,shpost-1 1898 } 1899 else 1900 { 1901 /* q = SRL(MULUH(m, SRL(n, shpre)), shpost) 1902 * SHR EAX,shpre 1903 * MOV reg,m 1904 * MUL reg 1905 * SHR EDX,shpost 1906 */ 1907 1908 freenode(e2); 1909 getlvalue(cdb,&cs,e1,mAX | mDX); 1910 regm_t idxregs = idxregm(&cs); 1911 opAssLoadReg(cdb, cs, e, reg, allregs & ~(mAX|mDX | idxregs)); // MOV reg,EA 1912 getregs(cdb, mAX|mDX); 1913 1914 if (reg != AX) 1915 { 1916 getregs(cdb,mAX); 1917 genmovreg(cdb,AX,reg); // MOV EAX,reg 1918 } 1919 if (shpre) 1920 { 1921 getregs(cdb,mAX); 1922 cdb.genc2(0xC1,grex | modregrm(3,5,AX),shpre); // SHR EAX,shpre 1923 } 1924 getregs(cdb,mDX); 1925 movregconst(cdb, DX, cast(targ_size_t)m, (sz == 8) ? 0x40 : 0); // MOV EDX,m 1926 getregs(cdb,mDX | mAX); 1927 cdb.gen2(0xF7,grex | modregrmx(3,4,DX)); // MUL EDX 1928 if (shpost) 1929 cdb.genc2(0xC1,grex | modregrm(3,5,DX),shpost); // SHR EDX,shpost 1930 r3 = DX; 1931 } 1932 1933 reg_t resregx; 1934 switch (op) 1935 { 1936 case OPdivass: 1937 // r3 = quotient 1938 resregx = r3; 1939 break; 1940 1941 case OPmodass: 1942 /* reg = original value 1943 * r3 = quotient 1944 */ 1945 assert(reg != AX); 1946 if (el_signx32(e2)) 1947 { 1948 cdb.genc2(0x69,grex | modregrmx(3,AX,r3),e2factor); // IMUL EAX,r3,e2factor 1949 } 1950 else 1951 { 1952 assert(!(mask(r3) & mAX)); 1953 movregconst(cdb,AX,e2factor,(sz == 8) ? 0x40 : 0); // MOV EAX,e2factor 1954 getregs(cdb,mAX); 1955 cdb.gen2(0x0FAF,grex | modregrmx(3,AX,r3)); // IMUL EAX,r3 1956 } 1957 getregs(cdb,mask(reg)); 1958 cdb.gen2(0x2B,grex | modregxrm(3,reg,AX)); // SUB reg,EAX 1959 resregx = reg; 1960 break; 1961 1962 default: 1963 assert(0); 1964 } 1965 1966 opAssStoreReg(cdb, cs, e, resregx, pretregs); 1967 return; 1968 } 1969 1970 if (config.flags4 & CFG4speed && 1971 e2.Eoper == OPconst && 1972 uns && 1973 e2factor > 2 && (e2factor & (e2factor - 1)) && 1974 ((I32 && sz == 4) || (I64 && (sz == 4 || sz == 8)))) 1975 { 1976 unsignedDivideByConstant(cdb); 1977 return; 1978 } 1979 1980 if (config.flags4 & CFG4speed && 1981 e2.Eoper == OPconst && !uns && 1982 (sz == REGSIZE || (I64 && sz == 4)) && 1983 pow2 != -1 && 1984 e2factor == cast(int)e2factor && 1985 !(config.target_cpu < TARGET_80286 && pow2 != 1 && op == OPdivass) 1986 ) 1987 { 1988 freenode(e2); 1989 if (pow2 == 1 && op == OPdivass && config.target_cpu > TARGET_80386) 1990 { 1991 /* This is better than the code further down because it is 1992 * not constrained to using AX and DX. 1993 */ 1994 getlvalue(cdb,&cs,e1,0); 1995 regm_t idxregs = idxregm(&cs); 1996 reg_t reg; 1997 opAssLoadReg(cdb,cs,e,reg,allregs & ~idxregs); // MOV reg,EA 1998 1999 reg_t r = allocScratchReg(cdb, allregs & ~(idxregs | mask(reg))); 2000 genmovreg(cdb,r,reg); // MOV r,reg 2001 cdb.genc2(0xC1,grex | modregxrmx(3,5,r),(sz * 8 - 1)); // SHR r,31 2002 cdb.gen2(0x03,grex | modregxrmx(3,reg,r)); // ADD reg,r 2003 cdb.gen2(0xD1,grex | modregrmx(3,7,reg)); // SAR reg,1 2004 2005 opAssStoreReg(cdb, cs, e, reg, pretregs); 2006 return; 2007 } 2008 2009 // Signed divide or modulo by power of 2 2010 getlvalue(cdb,&cs,e1,mAX | mDX); 2011 reg_t reg; 2012 opAssLoadReg(cdb,cs,e,reg,mAX); 2013 2014 getregs(cdb,mDX); // DX is scratch register 2015 cdb.gen1(0x99); // CWD 2016 code_orrex(cdb.last(), rex); 2017 if (pow2 == 1) 2018 { 2019 if (op == OPdivass) 2020 { 2021 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2022 cdb.gen2(0xD1,grex | modregrm(3,7,AX)); // SAR AX,1 2023 resreg = AX; 2024 } 2025 else // OPmod 2026 { 2027 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2028 cdb.genc2(0x81,grex | modregrm(3,4,AX),1); // AND AX,1 2029 cdb.gen2(0x03,grex | modregrm(3,DX,AX)); // ADD DX,AX 2030 resreg = DX; 2031 } 2032 } 2033 else 2034 { 2035 assert(pow2 < 32); 2036 targ_ulong m = (1 << pow2) - 1; 2037 if (op == OPdivass) 2038 { 2039 cdb.genc2(0x81,grex | modregrm(3,4,DX),m); // AND DX,m 2040 cdb.gen2(0x03,grex | modregrm(3,AX,DX)); // ADD AX,DX 2041 // Be careful not to generate this for 8088 2042 assert(config.target_cpu >= TARGET_80286); 2043 cdb.genc2(0xC1,grex | modregrm(3,7,AX),pow2); // SAR AX,pow2 2044 resreg = AX; 2045 } 2046 else // OPmodass 2047 { 2048 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2049 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2050 cdb.genc2(0x81,grex | modregrm(3,4,AX),m); // AND AX,m 2051 cdb.gen2(0x33,grex | modregrm(3,AX,DX)); // XOR AX,DX 2052 cdb.gen2(0x2B,grex | modregrm(3,AX,DX)); // SUB AX,DX 2053 resreg = AX; 2054 } 2055 } 2056 } 2057 else 2058 { 2059 regm_t retregs = ALLREGS & ~(mAX|mDX); // DX gets sign extension 2060 codelem(cdb,e2,&retregs,false); // load rvalue in retregs 2061 reg_t reg = findreg(retregs); 2062 getlvalue(cdb,&cs,e1,mAX | mDX | retregs); // get EA 2063 getregs(cdb,mAX | mDX); // destroy these regs 2064 cs.Irm |= modregrm(0,AX,0); 2065 cs.Iop = LOD; 2066 cdb.gen(&cs); // MOV AX,EA 2067 if (uns) // if uint 2068 movregconst(cdb,DX,0,0); // CLR DX 2069 else // else signed 2070 { 2071 cdb.gen1(0x99); // CWD 2072 code_orrex(cdb.last(),rex); 2073 } 2074 getregs(cdb,mDX | mAX); // DX and AX will be destroyed 2075 const uint opr = uns ? 6 : 7; // DIV/IDIV 2076 genregs(cdb,0xF7,opr,reg); // OPR reg 2077 code_orrex(cdb.last(),rex); 2078 resreg = (op == OPmodass) ? DX : AX; // result register 2079 } 2080 opAssStoreReg(cdb, cs, e, resreg, pretregs); 2081 return; 2082 } 2083 2084 assert(sz == 2 * REGSIZE); 2085 2086 targ_size_t e2factor; 2087 int pow2; 2088 if (e2.Eoper == OPconst) 2089 { 2090 e2factor = cast(targ_size_t)el_tolong(e2); 2091 pow2 = ispow2(e2factor); 2092 } 2093 2094 // Register pair signed divide by power of 2 2095 if (op == OPdivass && 2096 !uns && 2097 e.Eoper == OPconst && 2098 pow2 != -1 && 2099 I32 // not set up for I16 or I64 cent 2100 ) 2101 { 2102 freenode(e2); 2103 regm_t retregs = mDX|mAX | mCX|mBX; // LSW must be byte reg because of later SETZ 2104 reg_t rhi, rlo; 2105 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 2106 const regm_t keepmsk = idxregm(&cs); 2107 retregs = mask(rhi) | mask(rlo); 2108 2109 if (pow2 < 32) 2110 { 2111 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2112 2113 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2114 if (pow2 == 1) 2115 cdb.genc2(0xC1,grex | modregrmx(3,5,r1),REGSIZE * 8 - 1); // SHR r1,31 2116 else 2117 { 2118 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2119 cdb.genc2(0x81,grex | modregrmx(3,4,r1),(1 << pow2) - 1); // AND r1,mask 2120 } 2121 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2122 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 2123 cdb.genc2(0x0FAC,grex | modregrm(3,rhi,rlo),pow2); // SHRD rlo,rhi,pow2 2124 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),pow2); // SAR rhi,pow2 2125 } 2126 else if (pow2 == 32) 2127 { 2128 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2129 2130 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2131 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2132 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2133 cdb.genc2(0x81,grex | modregxrmx(3,2,rhi),0); // ADC rhi,0 2134 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 2135 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 2136 } 2137 else if (pow2 < 63) 2138 { 2139 reg_t r1 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk)); 2140 reg_t r2 = allocScratchReg(cdb, allregs & ~(retregs | keepmsk | mask(r1))); 2141 2142 genmovreg(cdb,r1,rhi); // MOV r1,rhi 2143 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2144 cdb.genmovreg(r2,r1); // MOV r2,r1 2145 2146 if (pow2 == 33) 2147 { 2148 cdb.gen2(0xF7,modregrmx(3,3,r1)); // NEG r1 2149 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r2)); // ADD rlo,r2 2150 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r1)); // ADC rhi,r1 2151 } 2152 else 2153 { 2154 cdb.genc2(0x81,grex | modregrmx(3,4,r2),(1 << (pow2-32)) - 1); // AND r2,mask 2155 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2156 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 2157 } 2158 2159 cdb.genmovreg(rlo,rhi); // MOV rlo,rhi 2160 cdb.genc2(0xC1,grex | modregrmx(3,7,rlo),pow2 - 32); // SAR rlo,pow2-32 2161 cdb.genc2(0xC1,grex | modregrmx(3,7,rhi),REGSIZE * 8 - 1); // SAR rhi,31 2162 } 2163 else 2164 { 2165 // This may be better done by cgelem.d 2166 assert(pow2 == 63); 2167 assert(mask(rlo) & BYTEREGS); // for SETZ 2168 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),0x8000_0000); // ADD rhi,0x8000_000 2169 cdb.genregs(0x09,rlo,rhi); // OR rlo,rhi 2170 cdb.gen2(0x0F94,modregrmx(3,0,rlo)); // SETZ rlo 2171 cdb.genregs(MOVZXb,rlo,rlo); // MOVZX rlo,rloL 2172 movregconst(cdb,rhi,0,0); // MOV rhi,0 2173 } 2174 2175 opAssStorePair(cdb, cs, e, rlo, rhi, pretregs); 2176 return; 2177 } 2178 2179 // Register pair signed modulo by power of 2 2180 if (op == OPmodass && 2181 !uns && 2182 e.Eoper == OPconst && 2183 pow2 != -1 && 2184 I32 // not set up for I64 cent yet 2185 ) 2186 { 2187 freenode(e2); 2188 regm_t retregs = mDX|mAX; 2189 reg_t rhi, rlo; 2190 opAssLoadPair(cdb, cs, e, rhi, rlo, retregs, 0); 2191 const regm_t keepmsk = idxregm(&cs); 2192 2193 regm_t scratchm = allregs & ~(retregs | keepmsk); 2194 if (pow2 == 63) 2195 scratchm &= BYTEREGS; // because of SETZ 2196 reg_t r1 = allocScratchReg(cdb, scratchm); 2197 2198 if (pow2 < 32) 2199 { 2200 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2201 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2202 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 2203 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2204 cdb.genc2(0x81,grex | modregrmx(3,4,rlo),(1<<pow2)-1); // AND rlo,(1<<pow2)-1 2205 cdb.gen2(0x33,grex | modregxrmx(3,rlo,r1)); // XOR rlo,r1 2206 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2207 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 2208 } 2209 else if (pow2 == 32) 2210 { 2211 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2212 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2213 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2214 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2215 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,rhi)); // SBB rhi,rhi 2216 } 2217 else if (pow2 < 63) 2218 { 2219 scratchm = allregs & ~(retregs | scratchm); 2220 reg_t r2; 2221 allocreg(cdb,&scratchm,&r2,TYint); 2222 2223 cdb.genmovreg(r1,rhi); // MOV r1,rhi 2224 cdb.genc2(0xC1,grex | modregrmx(3,7,r1),REGSIZE * 8 - 1); // SAR r1,31 2225 cdb.genmovreg(r2,r1); // MOV r2,r1 2226 cdb.genc2(0x0FAC,grex | modregrm(3,r2,r1),64-pow2); // SHRD r1,r2,64-pow2 2227 cdb.genc2(0xC1,grex | modregrmx(3,5,r2),64-pow2); // SHR r2,64-pow2 2228 cdb.gen2(0x03,grex | modregxrmx(3,rlo,r1)); // ADD rlo,r1 2229 cdb.gen2(0x13,grex | modregxrmx(3,rhi,r2)); // ADC rhi,r2 2230 cdb.genc2(0x81,grex | modregrmx(3,4,rhi),(1<<(pow2-32))-1); // AND rhi,(1<<(pow2-32))-1 2231 cdb.gen2(0x2B,grex | modregxrmx(3,rlo,r1)); // SUB rlo,r1 2232 cdb.gen2(0x1B,grex | modregxrmx(3,rhi,r2)); // SBB rhi,r2 2233 } 2234 else 2235 { 2236 // This may be better done by cgelem.d 2237 assert(pow2 == 63); 2238 2239 cdb.genc1(LEA,grex | modregxrmx(2,r1,rhi), FLconst, 0x8000_0000); // LEA r1,0x8000_0000[rhi] 2240 cdb.gen2(0x0B,grex | modregxrmx(3,r1,rlo)); // OR r1,rlo 2241 cdb.gen2(0x0F94,modregrmx(3,0,r1)); // SETZ r1 2242 cdb.genc2(0xC1,grex | modregrmx(3,4,r1),REGSIZE * 8 - 1); // SHL r1,31 2243 cdb.gen2(0x2B,grex | modregxrmx(3,rhi,r1)); // SUB rhi,r1 2244 } 2245 2246 opAssStorePair(cdb, cs, e, rlo, rhi, pretregs); 2247 return; 2248 } 2249 2250 regm_t rretregs = mCX|mBX; 2251 codelem(cdb,e2,&rretregs,false); // load e2 into CX|BX 2252 2253 reg_t rlo; 2254 reg_t rhi; 2255 opAssLoadPair(cdb, cs, e, rhi, rlo, mDX|mAX, rretregs); 2256 2257 regm_t retregs = (op == OPmodass) ? mCX|mBX : mDX|mAX; 2258 uint lib = uns ? CLIB.uldiv : CLIB.ldiv; 2259 if (op == OPmodass) 2260 ++lib; 2261 callclib(cdb,e,lib,&retregs,idxregm(&cs)); 2262 2263 opAssStorePair(cdb, cs, e, findregmsw(retregs), findreglsw(retregs), pretregs); 2264 } 2265 2266 2267 /******************************** 2268 * Generate code for <<= and >>= 2269 */ 2270 2271 void cdshass(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2272 { 2273 code cs; 2274 regm_t retregs; 2275 uint op1,op2; 2276 reg_t reg; 2277 2278 elem *e1 = e.EV.E1; 2279 elem *e2 = e.EV.E2; 2280 2281 tym_t tyml = tybasic(e1.Ety); // type of lvalue 2282 uint sz = _tysize[tyml]; 2283 uint isbyte = tybyte(e.Ety) != 0; // 1 for byte operations 2284 tym_t tym = tybasic(e.Ety); // type of result 2285 OPER oper = e.Eoper; 2286 assert(tysize(e2.Ety) <= REGSIZE); 2287 2288 uint rex = (I64 && sz == 8) ? REX_W : 0; 2289 2290 // if our lvalue is a cse, make sure we evaluate for result in register 2291 if (e1.Ecount && !(*pretregs & (ALLREGS | mBP)) && !isregvar(e1,&retregs,®)) 2292 *pretregs |= ALLREGS; 2293 2294 version (SCPP) 2295 { 2296 // Do this until the rest of the compiler does OPshr/OPashr correctly 2297 if (oper == OPshrass) 2298 oper = tyuns(tyml) ? OPshrass : OPashrass; 2299 } 2300 2301 // Select opcodes. op2 is used for msw for long shifts. 2302 2303 switch (oper) 2304 { 2305 case OPshlass: 2306 op1 = 4; // SHL 2307 op2 = 2; // RCL 2308 break; 2309 2310 case OPshrass: 2311 op1 = 5; // SHR 2312 op2 = 3; // RCR 2313 break; 2314 2315 case OPashrass: 2316 op1 = 7; // SAR 2317 op2 = 3; // RCR 2318 break; 2319 2320 default: 2321 assert(0); 2322 } 2323 2324 2325 uint v = 0xD3; // for SHIFT xx,CL cases 2326 uint loopcnt = 1; 2327 uint conste2 = false; 2328 uint shiftcnt = 0; // avoid "use before initialized" warnings 2329 if (e2.Eoper == OPconst) 2330 { 2331 conste2 = true; // e2 is a constant 2332 shiftcnt = e2.EV.Vint; // byte ordering of host 2333 if (config.target_cpu >= TARGET_80286 && 2334 sz <= REGSIZE && 2335 shiftcnt != 1) 2336 v = 0xC1; // SHIFT xx,shiftcnt 2337 else if (shiftcnt <= 3) 2338 { 2339 loopcnt = shiftcnt; 2340 v = 0xD1; // SHIFT xx,1 2341 } 2342 } 2343 2344 if (v == 0xD3) // if COUNT == CL 2345 { 2346 retregs = mCX; 2347 codelem(cdb,e2,&retregs,false); 2348 } 2349 else 2350 freenode(e2); 2351 getlvalue(cdb,&cs,e1,mCX); // get lvalue, preserve CX 2352 modEA(cdb,&cs); // check for modifying register 2353 2354 if (*pretregs == 0 || // if don't return result 2355 (*pretregs == mPSW && conste2 && _tysize[tym] <= REGSIZE) || 2356 sz > REGSIZE 2357 ) 2358 { 2359 retregs = 0; // value not returned in a register 2360 cs.Iop = v ^ isbyte; 2361 while (loopcnt--) 2362 { 2363 NEWREG(cs.Irm,op1); // make sure op1 is first 2364 if (sz <= REGSIZE) 2365 { 2366 if (conste2) 2367 { 2368 cs.IFL2 = FLconst; 2369 cs.IEV2.Vint = shiftcnt; 2370 } 2371 cdb.gen(&cs); // SHIFT EA,[CL|1] 2372 if (*pretregs & mPSW && !loopcnt && conste2) 2373 code_orflag(cdb.last(),CFpsw); 2374 } 2375 else // TYlong 2376 { 2377 cs.Iop = 0xD1; // plain shift 2378 code *ce = gennop(null); // ce: NOP 2379 if (v == 0xD3) 2380 { 2381 getregs(cdb,mCX); 2382 if (!conste2) 2383 { 2384 assert(loopcnt == 0); 2385 genjmp(cdb,JCXZ,FLcode,cast(block *) ce); // JCXZ ce 2386 } 2387 } 2388 code *cg; 2389 if (oper == OPshlass) 2390 { 2391 cdb.gen(&cs); // cg: SHIFT EA 2392 cg = cdb.last(); 2393 code_orflag(cg,CFpsw); 2394 getlvalue_msw(&cs); 2395 NEWREG(cs.Irm,op2); 2396 cdb.gen(&cs); // SHIFT EA 2397 getlvalue_lsw(&cs); 2398 } 2399 else 2400 { 2401 getlvalue_msw(&cs); 2402 cdb.gen(&cs); 2403 cg = cdb.last(); 2404 code_orflag(cg,CFpsw); 2405 NEWREG(cs.Irm,op2); 2406 getlvalue_lsw(&cs); 2407 cdb.gen(&cs); 2408 } 2409 if (v == 0xD3) // if building a loop 2410 { 2411 genjmp(cdb,LOOP,FLcode,cast(block *) cg); // LOOP cg 2412 regimmed_set(CX,0); // note that now CX == 0 2413 } 2414 cdb.append(ce); 2415 } 2416 } 2417 2418 // If we want the result, we must load it from the EA 2419 // into a register. 2420 2421 if (sz == 2 * REGSIZE && *pretregs) 2422 { 2423 retregs = *pretregs & (ALLREGS | mBP); 2424 if (retregs) 2425 { 2426 retregs &= ~idxregm(&cs); 2427 allocreg(cdb,&retregs,®,tym); 2428 cs.Iop = LOD; 2429 2430 // be careful not to trash any index regs 2431 // do MSW first (which can't be an index reg) 2432 getlvalue_msw(&cs); 2433 NEWREG(cs.Irm,reg); 2434 cdb.gen(&cs); 2435 getlvalue_lsw(&cs); 2436 reg = findreglsw(retregs); 2437 NEWREG(cs.Irm,reg); 2438 cdb.gen(&cs); 2439 if (*pretregs & mPSW) 2440 tstresult(cdb,retregs,tyml,true); 2441 } 2442 else // flags only 2443 { 2444 retregs = ALLREGS & ~idxregm(&cs); 2445 allocreg(cdb,&retregs,®,TYint); 2446 cs.Iop = LOD; 2447 NEWREG(cs.Irm,reg); 2448 cdb.gen(&cs); // MOV reg,EA 2449 cs.Iop = 0x0B; // OR reg,EA+2 2450 cs.Iflags |= CFpsw; 2451 getlvalue_msw(&cs); 2452 cdb.gen(&cs); 2453 } 2454 } 2455 if (e1.Ecount && !(retregs & regcon.mvar)) // if lvalue is a CSE 2456 cssave(e1,retregs,!OTleaf(e1.Eoper)); 2457 freenode(e1); 2458 *pretregs = retregs; 2459 return; 2460 } 2461 else // else must evaluate in register 2462 { 2463 if (sz <= REGSIZE) 2464 { 2465 regm_t possregs = ALLREGS & ~mCX & ~idxregm(&cs); 2466 if (isbyte) 2467 possregs &= BYTEREGS; 2468 retregs = *pretregs & possregs; 2469 if (retregs == 0) 2470 retregs = possregs; 2471 allocreg(cdb,&retregs,®,tym); 2472 cs.Iop = LOD ^ isbyte; 2473 code_newreg(&cs, reg); 2474 if (isbyte && I64 && (reg >= 4)) 2475 cs.Irex |= REX; 2476 cdb.gen(&cs); // MOV reg,EA 2477 if (!I16) 2478 { 2479 assert(!isbyte || (mask(reg) & BYTEREGS)); 2480 cdb.genc2(v ^ isbyte,modregrmx(3,op1,reg),shiftcnt); 2481 if (isbyte && I64 && (reg >= 4)) 2482 cdb.last().Irex |= REX; 2483 code_orrex(cdb.last(), rex); 2484 // We can do a 32 bit shift on a 16 bit operand if 2485 // it's a left shift and we're not concerned about 2486 // the flags. Remember that flags are not set if 2487 // a shift of 0 occurs. 2488 if (_tysize[tym] == SHORTSIZE && 2489 (oper == OPshrass || oper == OPashrass || 2490 (*pretregs & mPSW && conste2))) 2491 cdb.last().Iflags |= CFopsize; // 16 bit operand 2492 } 2493 else 2494 { 2495 while (loopcnt--) 2496 { // Generate shift instructions. 2497 cdb.genc2(v ^ isbyte,modregrm(3,op1,reg),shiftcnt); 2498 } 2499 } 2500 if (*pretregs & mPSW && conste2) 2501 { 2502 assert(shiftcnt); 2503 *pretregs &= ~mPSW; // result is already in flags 2504 code_orflag(cdb.last(),CFpsw); 2505 } 2506 2507 opAssStoreReg(cdb,cs,e,reg,pretregs); 2508 return; 2509 } 2510 assert(0); 2511 } 2512 } 2513 2514 2515 /********************************** 2516 * Generate code for compares. 2517 * Handles lt,gt,le,ge,eqeq,ne for all data types. 2518 */ 2519 2520 void cdcmp(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 2521 { 2522 regm_t retregs,rretregs; 2523 reg_t reg,rreg; 2524 int fl; 2525 2526 //printf("cdcmp(e = %p, pretregs = %s)\n",e,regm_str(*pretregs)); 2527 // Collect extra parameter. This is pretty ugly... 2528 int flag = cdcmp_flag; 2529 cdcmp_flag = 0; 2530 2531 elem *e1 = e.EV.E1; 2532 elem *e2 = e.EV.E2; 2533 if (*pretregs == 0) // if don't want result 2534 { 2535 codelem(cdb,e1,pretregs,false); 2536 *pretregs = 0; // in case e1 changed it 2537 codelem(cdb,e2,pretregs,false); 2538 return; 2539 } 2540 2541 uint jop = jmpopcode(e); // must be computed before 2542 // leaves are free'd 2543 uint reverse = 0; 2544 2545 OPER op = e.Eoper; 2546 assert(OTrel(op)); 2547 bool eqorne = (op == OPeqeq) || (op == OPne); 2548 2549 tym_t tym = tybasic(e1.Ety); 2550 uint sz = _tysize[tym]; 2551 uint isbyte = sz == 1; 2552 2553 uint rex = (I64 && sz == 8) ? REX_W : 0; 2554 uint grex = rex << 16; // 64 bit operands 2555 2556 code cs; 2557 code *ce; 2558 if (tyfloating(tym)) // if floating operation 2559 { 2560 if (config.fpxmmregs) 2561 { 2562 retregs = mPSW; 2563 if (tyxmmreg(tym)) 2564 orthxmm(cdb,e,&retregs); 2565 else 2566 orth87(cdb,e,&retregs); 2567 } 2568 else if (config.inline8087) 2569 { retregs = mPSW; 2570 orth87(cdb,e,&retregs); 2571 } 2572 else 2573 { 2574 if (config.exe & EX_windos) 2575 { 2576 int clib; 2577 2578 retregs = 0; /* skip result for now */ 2579 if (iffalse(e2)) /* second operand is constant 0 */ 2580 { 2581 assert(!eqorne); /* should be OPbool or OPnot */ 2582 if (tym == TYfloat) 2583 { 2584 retregs = FLOATREGS; 2585 clib = CLIB.ftst0; 2586 } 2587 else 2588 { 2589 retregs = DOUBLEREGS; 2590 clib = CLIB.dtst0; 2591 } 2592 if (rel_exception(op)) 2593 clib += CLIB.dtst0exc - CLIB.dtst0; 2594 codelem(cdb,e1,&retregs,false); 2595 retregs = 0; 2596 callclib(cdb,e,clib,&retregs,0); 2597 freenode(e2); 2598 } 2599 else 2600 { 2601 clib = CLIB.dcmp; 2602 if (rel_exception(op)) 2603 clib += CLIB.dcmpexc - CLIB.dcmp; 2604 opdouble(cdb,e,&retregs,clib); 2605 } 2606 } 2607 else 2608 { 2609 assert(0); 2610 } 2611 } 2612 goto L3; 2613 } 2614 2615 /* If it's a signed comparison of longs, we have to call a library */ 2616 /* routine, because we don't know the target of the signed branch */ 2617 /* (have to set up flags so that jmpopcode() will do it right) */ 2618 if (!eqorne && 2619 (I16 && tym == TYlong && tybasic(e2.Ety) == TYlong || 2620 I32 && tym == TYllong && tybasic(e2.Ety) == TYllong) 2621 ) 2622 { 2623 assert(jop != JC && jop != JNC); 2624 retregs = mDX | mAX; 2625 codelem(cdb,e1,&retregs,false); 2626 retregs = mCX | mBX; 2627 scodelem(cdb,e2,&retregs,mDX | mAX,false); 2628 2629 if (I16) 2630 { 2631 retregs = 0; 2632 callclib(cdb,e,CLIB.lcmp,&retregs,0); // gross, but it works 2633 } 2634 else 2635 { 2636 /* Generate: 2637 * CMP EDX,ECX 2638 * JNE C1 2639 * XOR EDX,EDX 2640 * CMP EAX,EBX 2641 * JZ C1 2642 * JA C3 2643 * DEC EDX 2644 * JMP C1 2645 * C3: INC EDX 2646 * C1: 2647 */ 2648 getregs(cdb,mDX); 2649 genregs(cdb,0x39,CX,DX); // CMP EDX,ECX 2650 code *c1 = gennop(null); 2651 genjmp(cdb,JNE,FLcode,cast(block *)c1); // JNE C1 2652 movregconst(cdb,DX,0,0); // XOR EDX,EDX 2653 genregs(cdb,0x39,BX,AX); // CMP EAX,EBX 2654 genjmp(cdb,JE,FLcode,cast(block *)c1); // JZ C1 2655 code *c3 = gen1(null,0x40 + DX); // INC EDX 2656 genjmp(cdb,JA,FLcode,cast(block *)c3); // JA C3 2657 cdb.gen1(0x48 + DX); // DEC EDX 2658 genjmp(cdb,JMPS,FLcode,cast(block *)c1); // JMP C1 2659 cdb.append(c3); 2660 cdb.append(c1); 2661 getregs(cdb,mDX); 2662 retregs = mPSW; 2663 } 2664 goto L3; 2665 } 2666 2667 /* See if we should reverse the comparison, so a JA => JC, and JBE => JNC 2668 * (This is already reflected in the jop) 2669 */ 2670 if ((jop == JC || jop == JNC) && 2671 (op == OPgt || op == OPle) && 2672 (tyuns(tym) || tyuns(e2.Ety)) 2673 ) 2674 { // jmpopcode() sez comparison should be reversed 2675 assert(e2.Eoper != OPconst && e2.Eoper != OPrelconst); 2676 reverse ^= 2; 2677 } 2678 2679 /* See if we should swap operands */ 2680 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 2681 { 2682 e1 = e.EV.E2; 2683 e2 = e.EV.E1; 2684 reverse ^= 2; 2685 } 2686 2687 retregs = allregs; 2688 if (isbyte) 2689 retregs = BYTEREGS; 2690 2691 ce = null; 2692 cs.Iflags = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 2693 cs.Irex = cast(ubyte)rex; 2694 if (sz > REGSIZE) 2695 ce = gennop(ce); 2696 2697 switch (e2.Eoper) 2698 { 2699 default: 2700 L2: 2701 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2702 rretregs = allregs & ~retregs; 2703 if (isbyte) 2704 rretregs &= BYTEREGS; 2705 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 2706 if (sz <= REGSIZE) // CMP reg,rreg 2707 { 2708 reg = findreg(retregs); // get reg that e1 is in 2709 rreg = findreg(rretregs); 2710 genregs(cdb,0x3B ^ isbyte ^ reverse,reg,rreg); 2711 code_orrex(cdb.last(), rex); 2712 if (!I16 && sz == SHORTSIZE) 2713 cdb.last().Iflags |= CFopsize; // compare only 16 bits 2714 if (I64 && isbyte && (reg >= 4 || rreg >= 4)) 2715 cdb.last().Irex |= REX; // address byte registers 2716 } 2717 else 2718 { 2719 assert(sz <= 2 * REGSIZE); 2720 2721 // Compare MSW, if they're equal then compare the LSW 2722 reg = findregmsw(retregs); 2723 rreg = findregmsw(rretregs); 2724 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2725 if (I32 && sz == 6) 2726 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2727 else if (I64) 2728 code_orrex(cdb.last(), REX_W); 2729 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2730 2731 reg = findreglsw(retregs); 2732 rreg = findreglsw(rretregs); 2733 genregs(cdb,0x3B ^ reverse,reg,rreg); // CMP reg,rreg 2734 if (I64) 2735 code_orrex(cdb.last(), REX_W); 2736 } 2737 break; 2738 2739 case OPrelconst: 2740 if (I64 && (config.flags3 & CFG3pic || config.exe == EX_WIN64)) 2741 goto L2; 2742 fl = el_fl(e2); 2743 switch (fl) 2744 { 2745 case FLfunc: 2746 fl = FLextern; // so it won't be self-relative 2747 break; 2748 2749 case FLdata: 2750 case FLudata: 2751 case FLextern: 2752 if (sz > REGSIZE) // compare against DS, not DGROUP 2753 goto L2; 2754 break; 2755 2756 case FLfardata: 2757 break; 2758 2759 default: 2760 goto L2; 2761 } 2762 cs.IFL2 = cast(ubyte)fl; 2763 cs.IEV2.Vsym = e2.EV.Vsym; 2764 if (sz > REGSIZE) 2765 { 2766 cs.Iflags |= CFseg; 2767 cs.IEV2.Voffset = 0; 2768 } 2769 else 2770 { 2771 cs.Iflags |= CFoff; 2772 cs.IEV2.Voffset = e2.EV.Voffset; 2773 } 2774 goto L4; 2775 2776 case OPconst: 2777 // If compare against 0 2778 if (sz <= REGSIZE && *pretregs == mPSW && !boolres(e2) && 2779 isregvar(e1,&retregs,®) 2780 ) 2781 { // Just do a TEST instruction 2782 genregs(cdb,0x85 ^ isbyte,reg,reg); // TEST reg,reg 2783 cdb.last().Iflags |= (cs.Iflags & CFopsize) | CFpsw; 2784 code_orrex(cdb.last(), rex); 2785 if (I64 && isbyte && reg >= 4) 2786 cdb.last().Irex |= REX; // address byte registers 2787 retregs = mPSW; 2788 break; 2789 } 2790 2791 if (!tyuns(tym) && !tyuns(e2.Ety) && 2792 !boolres(e2) && !(*pretregs & mPSW) && 2793 (sz == REGSIZE || (I64 && sz == 4)) && 2794 (!I16 || op == OPlt || op == OPge)) 2795 { 2796 assert(*pretregs & (allregs)); 2797 codelem(cdb,e1,pretregs,false); 2798 reg = findreg(*pretregs); 2799 getregs(cdb,mask(reg)); 2800 switch (op) 2801 { 2802 case OPle: 2803 cdb.genc2(0x81,grex | modregrmx(3,0,reg),cast(uint)-1); // ADD reg,-1 2804 code_orflag(cdb.last(), CFpsw); 2805 cdb.genc2(0x81,grex | modregrmx(3,2,reg),0); // ADC reg,0 2806 goto oplt; 2807 2808 case OPgt: 2809 cdb.gen2(0xF7,grex | modregrmx(3,3,reg)); // NEG reg 2810 /* Flips the sign bit unless the value is 0 or int.min. 2811 Also sets the carry bit when the value is not 0. */ 2812 code_orflag(cdb.last(), CFpsw); 2813 cdb.genc2(0x81,grex | modregrmx(3,3,reg),0); // SBB reg,0 2814 /* Subtracts the carry bit. This turns int.min into 2815 int.max, flipping the sign bit. 2816 For other negative and positive values, subtracting 1 2817 doesn't affect the sign bit. 2818 For 0, the carry bit is not set, so this does nothing 2819 and the sign bit is not affected. */ 2820 goto oplt; 2821 2822 case OPlt: 2823 oplt: 2824 // Get the sign bit, i.e. 1 if the value is negative. 2825 if (!I16) 2826 cdb.genc2(0xC1,grex | modregrmx(3,5,reg),sz * 8 - 1); // SHR reg,31 2827 else 2828 { /* 8088-286 do not have a barrel shifter, so use this 2829 faster sequence 2830 */ 2831 genregs(cdb,0xD1,0,reg); // ROL reg,1 2832 reg_t regi; 2833 if (reghasvalue(allregs,1,®i)) 2834 genregs(cdb,0x23,reg,regi); // AND reg,regi 2835 else 2836 cdb.genc2(0x81,modregrm(3,4,reg),1); // AND reg,1 2837 } 2838 break; 2839 2840 case OPge: 2841 genregs(cdb,0xD1,4,reg); // SHL reg,1 2842 code_orrex(cdb.last(),rex); 2843 code_orflag(cdb.last(), CFpsw); 2844 genregs(cdb,0x19,reg,reg); // SBB reg,reg 2845 code_orrex(cdb.last(),rex); 2846 if (I64) 2847 { 2848 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 2849 code_orrex(cdb.last(), rex); 2850 } 2851 else 2852 cdb.gen1(0x40 + reg); // INC reg 2853 break; 2854 2855 default: 2856 assert(0); 2857 } 2858 freenode(e2); 2859 goto ret; 2860 } 2861 2862 cs.IFL2 = FLconst; 2863 if (sz == 16) 2864 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vcent.msw; 2865 else if (sz > REGSIZE) 2866 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); 2867 else 2868 cs.IEV2.Vsize_t = cast(targ_size_t)e2.EV.Vllong; 2869 2870 // The cmp immediate relies on sign extension of the 32 bit immediate value 2871 if (I64 && sz >= REGSIZE && cs.IEV2.Vsize_t != cast(int)cs.IEV2.Vint) 2872 goto L2; 2873 L4: 2874 cs.Iop = 0x81 ^ isbyte; 2875 2876 /* if ((e1 is data or a '*' reference) and it's not a 2877 * common subexpression 2878 */ 2879 2880 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 2881 e1.Eoper == OPind) && 2882 !evalinregister(e1)) 2883 { 2884 getlvalue(cdb,&cs,e1,RMload); 2885 freenode(e1); 2886 if (evalinregister(e2)) 2887 { 2888 retregs = idxregm(&cs); 2889 if ((cs.Iflags & CFSEG) == CFes) 2890 retregs |= mES; // take no chances 2891 rretregs = allregs & ~retregs; 2892 if (isbyte) 2893 rretregs &= BYTEREGS; 2894 scodelem(cdb,e2,&rretregs,retregs,true); 2895 cs.Iop = 0x39 ^ isbyte ^ reverse; 2896 if (sz > REGSIZE) 2897 { 2898 rreg = findregmsw(rretregs); 2899 cs.Irm |= modregrm(0,rreg,0); 2900 getlvalue_msw(&cs); 2901 cdb.gen(&cs); // CMP EA+2,rreg 2902 if (I32 && sz == 6) 2903 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2904 if (I64 && isbyte && rreg >= 4) 2905 cdb.last().Irex |= REX; 2906 genjmp(cdb,JNE,FLcode,cast(block *) ce); // JNE nop 2907 rreg = findreglsw(rretregs); 2908 NEWREG(cs.Irm,rreg); 2909 getlvalue_lsw(&cs); 2910 } 2911 else 2912 { 2913 rreg = findreg(rretregs); 2914 code_newreg(&cs, rreg); 2915 if (I64 && isbyte && rreg >= 4) 2916 cs.Irex |= REX; 2917 } 2918 } 2919 else 2920 { 2921 cs.Irm |= modregrm(0,7,0); 2922 if (sz > REGSIZE) 2923 { 2924 if (sz == 6) 2925 assert(0); 2926 if (e2.Eoper == OPrelconst) 2927 { cs.Iflags = (cs.Iflags & ~(CFoff | CFseg)) | CFseg; 2928 cs.IEV2.Voffset = 0; 2929 } 2930 getlvalue_msw(&cs); 2931 cdb.gen(&cs); // CMP EA+2,const 2932 if (!I16 && sz == 6) 2933 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 2934 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE nop 2935 if (e2.Eoper == OPconst) 2936 cs.IEV2.Vint = cast(int)e2.EV.Vllong; 2937 else if (e2.Eoper == OPrelconst) 2938 { // Turn off CFseg, on CFoff 2939 cs.Iflags ^= CFseg | CFoff; 2940 cs.IEV2.Voffset = e2.EV.Voffset; 2941 } 2942 else 2943 assert(0); 2944 getlvalue_lsw(&cs); 2945 } 2946 freenode(e2); 2947 } 2948 cdb.gen(&cs); 2949 break; 2950 } 2951 2952 if (evalinregister(e2) && !OTassign(e1.Eoper) && 2953 !isregvar(e1,null,null)) 2954 { 2955 regm_t m; 2956 2957 m = allregs & ~regcon.mvar; 2958 if (isbyte) 2959 m &= BYTEREGS; 2960 if (m & (m - 1)) // if more than one free register 2961 goto L2; 2962 } 2963 if ((e1.Eoper == OPstrcmp || (OTassign(e1.Eoper) && sz <= REGSIZE)) && 2964 !boolres(e2) && !evalinregister(e1)) 2965 { 2966 retregs = mPSW; 2967 scodelem(cdb,e1,&retregs,0,false); 2968 freenode(e2); 2969 break; 2970 } 2971 if (sz <= REGSIZE && !boolres(e2) && e1.Eoper == OPadd && *pretregs == mPSW) 2972 { 2973 retregs |= mPSW; 2974 scodelem(cdb,e1,&retregs,0,false); 2975 freenode(e2); 2976 break; 2977 } 2978 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 2979 if (sz == 1) 2980 { 2981 reg = findreg(retregs & allregs); // get reg that e1 is in 2982 cs.Irm = modregrm(3,7,reg & 7); 2983 if (reg & 8) 2984 cs.Irex |= REX_B; 2985 if (e1.Eoper == OPvar && e1.EV.Voffset == 1 && e1.EV.Vsym.Sfl == FLreg) 2986 { assert(reg < 4); 2987 cs.Irm |= 4; // use upper register half 2988 } 2989 if (I64 && reg >= 4) 2990 cs.Irex |= REX; // address byte registers 2991 } 2992 else if (sz <= REGSIZE) 2993 { // CMP reg,const 2994 reg = findreg(retregs & allregs); // get reg that e1 is in 2995 rretregs = allregs & ~retregs; 2996 if (cs.IFL2 == FLconst && reghasvalue(rretregs,cs.IEV2.Vint,&rreg)) 2997 { 2998 genregs(cdb,0x3B,reg,rreg); 2999 code_orrex(cdb.last(), rex); 3000 if (!I16) 3001 cdb.last().Iflags |= cs.Iflags & CFopsize; 3002 freenode(e2); 3003 break; 3004 } 3005 cs.Irm = modregrm(3,7,reg & 7); 3006 if (reg & 8) 3007 cs.Irex |= REX_B; 3008 } 3009 else if (sz <= 2 * REGSIZE) 3010 { 3011 reg = findregmsw(retregs); // get reg that e1 is in 3012 cs.Irm = modregrm(3,7,reg); 3013 cdb.gen(&cs); // CMP reg,MSW 3014 if (I32 && sz == 6) 3015 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 3016 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 3017 3018 reg = findreglsw(retregs); 3019 cs.Irm = modregrm(3,7,reg); 3020 if (e2.Eoper == OPconst) 3021 cs.IEV2.Vint = e2.EV.Vlong; 3022 else if (e2.Eoper == OPrelconst) 3023 { // Turn off CFseg, on CFoff 3024 cs.Iflags ^= CFseg | CFoff; 3025 cs.IEV2.Voffset = e2.EV.Voffset; 3026 } 3027 else 3028 assert(0); 3029 } 3030 else 3031 assert(0); 3032 cdb.gen(&cs); // CMP sucreg,LSW 3033 freenode(e2); 3034 break; 3035 3036 case OPind: 3037 if (e2.Ecount) 3038 goto L2; 3039 goto L5; 3040 3041 case OPvar: 3042 if (config.exe & (EX_OSX | EX_OSX64)) 3043 { 3044 if (movOnly(e2)) 3045 goto L2; 3046 } 3047 if ((e1.Eoper == OPvar && 3048 isregvar(e2,&rretregs,®) && 3049 sz <= REGSIZE 3050 ) || 3051 (e1.Eoper == OPind && 3052 isregvar(e2,&rretregs,®) && 3053 !evalinregister(e1) && 3054 sz <= REGSIZE 3055 ) 3056 ) 3057 { 3058 // CMP EA,e2 3059 getlvalue(cdb,&cs,e1,RMload); 3060 freenode(e1); 3061 cs.Iop = 0x39 ^ isbyte ^ reverse; 3062 code_newreg(&cs,reg); 3063 if (I64 && isbyte && reg >= 4) 3064 cs.Irex |= REX; // address byte registers 3065 cdb.gen(&cs); 3066 freenode(e2); 3067 break; 3068 } 3069 L5: 3070 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3071 if (sz <= REGSIZE) // CMP reg,EA 3072 { 3073 reg = findreg(retregs & allregs); // get reg that e1 is in 3074 uint opsize = cs.Iflags & CFopsize; 3075 loadea(cdb,e2,&cs,0x3B ^ isbyte ^ reverse,reg,0,RMload | retregs,0); 3076 code_orflag(cdb.last(),opsize); 3077 } 3078 else if (sz <= 2 * REGSIZE) 3079 { 3080 reg = findregmsw(retregs); // get reg that e1 is in 3081 // CMP reg,EA 3082 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,REGSIZE,RMload | retregs,0); 3083 if (I32 && sz == 6) 3084 cdb.last().Iflags |= CFopsize; // seg is only 16 bits 3085 genjmp(cdb,JNE,FLcode, cast(block *) ce); // JNE ce 3086 reg = findreglsw(retregs); 3087 if (e2.Eoper == OPind) 3088 { 3089 NEWREG(cs.Irm,reg); 3090 getlvalue_lsw(&cs); 3091 cdb.gen(&cs); 3092 } 3093 else 3094 loadea(cdb,e2,&cs,0x3B ^ reverse,reg,0,RMload | retregs,0); 3095 } 3096 else 3097 assert(0); 3098 freenode(e2); 3099 break; 3100 } 3101 cdb.append(ce); 3102 3103 L3: 3104 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 3105 { 3106 if (config.target_cpu >= TARGET_80386 && !flag && !(jop & 0xFF00)) 3107 { 3108 regm_t resregs = retregs; 3109 if (!I64) 3110 { 3111 resregs &= BYTEREGS; 3112 if (!resregs) 3113 resregs = BYTEREGS; 3114 } 3115 allocreg(cdb,&resregs,®,TYint); 3116 cdb.gen2(0x0F90 + (jop & 0x0F),modregrmx(3,0,reg)); // SETcc reg 3117 if (I64 && reg >= 4) 3118 code_orrex(cdb.last(),REX); 3119 if (tysize(e.Ety) > 1) 3120 { 3121 genregs(cdb,MOVZXb,reg,reg); // MOVZX reg,reg 3122 if (I64 && sz == 8) 3123 code_orrex(cdb.last(),REX_W); 3124 if (I64 && reg >= 4) 3125 code_orrex(cdb.last(),REX); 3126 } 3127 *pretregs &= ~mPSW; 3128 fixresult(cdb,e,resregs,pretregs); 3129 } 3130 else 3131 { 3132 code *nop = null; 3133 regm_t save = regcon.immed.mval; 3134 allocreg(cdb,&retregs,®,TYint); 3135 regcon.immed.mval = save; 3136 if ((*pretregs & mPSW) == 0 && 3137 (jop == JC || jop == JNC)) 3138 { 3139 getregs(cdb,retregs); 3140 genregs(cdb,0x19,reg,reg); // SBB reg,reg 3141 if (rex || flag & REX_W) 3142 code_orrex(cdb.last(), REX_W); 3143 if (flag) 3144 { } // cdcond() will handle it 3145 else if (jop == JNC) 3146 { 3147 if (I64) 3148 { 3149 cdb.gen2(0xFF,modregrmx(3,0,reg)); // INC reg 3150 code_orrex(cdb.last(), rex); 3151 } 3152 else 3153 cdb.gen1(0x40 + reg); // INC reg 3154 } 3155 else 3156 { 3157 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 3158 code_orrex(cdb.last(), rex); 3159 } 3160 } 3161 else if (I64 && sz == 8) 3162 { 3163 assert(!flag); 3164 movregconst(cdb,reg,1,64|8); // MOV reg,1 3165 nop = gennop(nop); 3166 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 3167 // MOV reg,0 3168 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 64|8 : 64); 3169 regcon.immed.mval &= ~mask(reg); 3170 } 3171 else 3172 { 3173 assert(!flag); 3174 movregconst(cdb,reg,1,8); // MOV reg,1 3175 nop = gennop(nop); 3176 genjmp(cdb,jop,FLcode,cast(block *) nop); // Jtrue nop 3177 // MOV reg,0 3178 movregconst(cdb,reg,0,(*pretregs & mPSW) ? 8 : 0); 3179 regcon.immed.mval &= ~mask(reg); 3180 } 3181 *pretregs = retregs; 3182 cdb.append(nop); 3183 } 3184 } 3185 ret: 3186 { } 3187 } 3188 3189 3190 /********************************** 3191 * Generate code for signed compare of longs. 3192 * Input: 3193 * targ block* or code* 3194 */ 3195 3196 void longcmp(ref CodeBuilder cdb,elem *e,bool jcond,uint fltarg,code *targ) 3197 { 3198 // <= > < >= 3199 static immutable ubyte[4] jopmsw = [JL, JG, JL, JG ]; 3200 static immutable ubyte[4] joplsw = [JBE, JA, JB, JAE ]; 3201 3202 //printf("longcmp(e = %p)\n", e); 3203 elem *e1 = e.EV.E1; 3204 elem *e2 = e.EV.E2; 3205 OPER op = e.Eoper; 3206 3207 // See if we should swap operands 3208 if (e1.Eoper == OPvar && e2.Eoper == OPvar && evalinregister(e2)) 3209 { 3210 e1 = e.EV.E2; 3211 e2 = e.EV.E1; 3212 op = swaprel(op); 3213 } 3214 3215 code cs; 3216 cs.Iflags = 0; 3217 cs.Irex = 0; 3218 3219 code *ce = gennop(null); 3220 regm_t retregs = ALLREGS; 3221 regm_t rretregs; 3222 reg_t reg,rreg; 3223 3224 uint jop = jopmsw[op - OPle]; 3225 if (!(jcond & 1)) jop ^= (JL ^ JG); // toggle jump condition 3226 CodeBuilder cdbjmp; 3227 cdbjmp.ctor(); 3228 genjmp(cdbjmp,jop,fltarg, cast(block *) targ); // Jx targ 3229 genjmp(cdbjmp,jop ^ (JL ^ JG),FLcode, cast(block *) ce); // Jy nop 3230 3231 switch (e2.Eoper) 3232 { 3233 default: 3234 L2: 3235 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3236 rretregs = ALLREGS & ~retregs; 3237 scodelem(cdb,e2,&rretregs,retregs,true); // get right leaf 3238 cse_flush(cdb,1); 3239 // Compare MSW, if they're equal then compare the LSW 3240 reg = findregmsw(retregs); 3241 rreg = findregmsw(rretregs); 3242 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 3243 cdb.append(cdbjmp); 3244 3245 reg = findreglsw(retregs); 3246 rreg = findreglsw(rretregs); 3247 genregs(cdb,0x3B,reg,rreg); // CMP reg,rreg 3248 break; 3249 3250 case OPconst: 3251 cs.IEV2.Vint = cast(int)MSREG(e2.EV.Vllong); // MSW first 3252 cs.IFL2 = FLconst; 3253 cs.Iop = 0x81; 3254 3255 /* if ((e1 is data or a '*' reference) and it's not a 3256 * common subexpression 3257 */ 3258 3259 if ((e1.Eoper == OPvar && datafl[el_fl(e1)] || 3260 e1.Eoper == OPind) && 3261 !evalinregister(e1)) 3262 { 3263 getlvalue(cdb,&cs,e1,0); 3264 freenode(e1); 3265 if (evalinregister(e2)) 3266 { 3267 retregs = idxregm(&cs); 3268 if ((cs.Iflags & CFSEG) == CFes) 3269 retregs |= mES; // take no chances 3270 rretregs = ALLREGS & ~retregs; 3271 scodelem(cdb,e2,&rretregs,retregs,true); 3272 cse_flush(cdb,1); 3273 rreg = findregmsw(rretregs); 3274 cs.Iop = 0x39; 3275 cs.Irm |= modregrm(0,rreg,0); 3276 getlvalue_msw(&cs); 3277 cdb.gen(&cs); // CMP EA+2,rreg 3278 cdb.append(cdbjmp); 3279 rreg = findreglsw(rretregs); 3280 NEWREG(cs.Irm,rreg); 3281 } 3282 else 3283 { 3284 cse_flush(cdb,1); 3285 cs.Irm |= modregrm(0,7,0); 3286 getlvalue_msw(&cs); 3287 cdb.gen(&cs); // CMP EA+2,const 3288 cdb.append(cdbjmp); 3289 cs.IEV2.Vint = e2.EV.Vlong; 3290 freenode(e2); 3291 } 3292 getlvalue_lsw(&cs); 3293 cdb.gen(&cs); // CMP EA,rreg/const 3294 break; 3295 } 3296 if (evalinregister(e2)) 3297 goto L2; 3298 3299 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3300 cse_flush(cdb,1); 3301 reg = findregmsw(retregs); // get reg that e1 is in 3302 cs.Irm = modregrm(3,7,reg); 3303 3304 cdb.gen(&cs); // CMP reg,MSW 3305 cdb.append(cdbjmp); 3306 reg = findreglsw(retregs); 3307 cs.Irm = modregrm(3,7,reg); 3308 cs.IEV2.Vint = e2.EV.Vlong; 3309 cdb.gen(&cs); // CMP sucreg,LSW 3310 freenode(e2); 3311 break; 3312 3313 case OPvar: 3314 if (!e1.Ecount && e1.Eoper == OPs32_64) 3315 { 3316 reg_t msreg; 3317 3318 retregs = allregs; 3319 scodelem(cdb,e1.EV.E1,&retregs,0,true); 3320 freenode(e1); 3321 reg = findreg(retregs); 3322 retregs = allregs & ~retregs; 3323 allocreg(cdb,&retregs,&msreg,TYint); 3324 genmovreg(cdb,msreg,reg); // MOV msreg,reg 3325 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 3326 cse_flush(cdb,1); 3327 loadea(cdb,e2,&cs,0x3B,msreg,REGSIZE,mask(reg),0); 3328 cdb.append(cdbjmp); 3329 loadea(cdb,e2,&cs,0x3B,reg,0,mask(reg),0); 3330 freenode(e2); 3331 } 3332 else 3333 { 3334 scodelem(cdb,e1,&retregs,0,true); // compute left leaf 3335 cse_flush(cdb,1); 3336 reg = findregmsw(retregs); // get reg that e1 is in 3337 loadea(cdb,e2,&cs,0x3B,reg,REGSIZE,retregs,0); 3338 cdb.append(cdbjmp); 3339 reg = findreglsw(retregs); 3340 loadea(cdb,e2,&cs,0x3B,reg,0,retregs,0); 3341 freenode(e2); 3342 } 3343 break; 3344 } 3345 3346 jop = joplsw[op - OPle]; 3347 if (!(jcond & 1)) jop ^= 1; // toggle jump condition 3348 genjmp(cdb,jop,fltarg,cast(block *) targ); // Jcond targ 3349 3350 cdb.append(ce); 3351 freenode(e); 3352 } 3353 3354 /***************************** 3355 * Do conversions. 3356 * Depends on OPd_s32 and CLIB.dbllng being in sequence. 3357 */ 3358 3359 void cdcnvt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 3360 { 3361 //printf("cdcnvt: %p *pretregs = %s\n", e, regm_str(*pretregs)); 3362 //elem_print(e); 3363 3364 static immutable ubyte[2][16] clib = 3365 [ 3366 [ OPd_s32, CLIB.dbllng ], 3367 [ OPs32_d, CLIB.lngdbl ], 3368 [ OPd_s16, CLIB.dblint ], 3369 [ OPs16_d, CLIB.intdbl ], 3370 [ OPd_u16, CLIB.dbluns ], 3371 [ OPu16_d, CLIB.unsdbl ], 3372 [ OPd_u32, CLIB.dblulng ], 3373 [ OPu32_d, CLIB.ulngdbl ], 3374 [ OPd_s64, CLIB.dblllng ], 3375 [ OPs64_d, CLIB.llngdbl ], 3376 [ OPd_u64, CLIB.dblullng ], 3377 [ OPu64_d, CLIB.ullngdbl ], 3378 [ OPd_f, CLIB.dblflt ], 3379 [ OPf_d, CLIB.fltdbl ], 3380 [ OPvp_fp, CLIB.vptrfptr ], 3381 [ OPcvp_fp, CLIB.cvptrfptr] 3382 ]; 3383 3384 if (!*pretregs) 3385 { 3386 codelem(cdb,e.EV.E1,pretregs,false); 3387 return; 3388 } 3389 3390 regm_t retregs; 3391 if (config.inline8087) 3392 { 3393 switch (e.Eoper) 3394 { 3395 case OPld_d: 3396 case OPd_ld: 3397 { 3398 if (tycomplex(e.EV.E1.Ety)) 3399 { 3400 Lcomplex: 3401 regm_t retregsx = mST01 | (*pretregs & mPSW); 3402 codelem(cdb,e.EV.E1, &retregsx, false); 3403 fixresult_complex87(cdb, e, retregsx, pretregs); 3404 return; 3405 } 3406 regm_t retregsx = mST0 | (*pretregs & mPSW); 3407 codelem(cdb,e.EV.E1, &retregsx, false); 3408 fixresult87(cdb, e, retregsx, pretregs); 3409 return; 3410 } 3411 3412 case OPf_d: 3413 case OPd_f: 3414 if (tycomplex(e.EV.E1.Ety)) 3415 goto Lcomplex; 3416 if (config.fpxmmregs && *pretregs & XMMREGS) 3417 { 3418 xmmcnvt(cdb, e, pretregs); 3419 return; 3420 } 3421 3422 /* if won't do us much good to transfer back and */ 3423 /* forth between 8088 registers and 8087 registers */ 3424 if (OTcall(e.EV.E1.Eoper) && !(*pretregs & allregs)) 3425 { 3426 retregs = regmask(e.EV.E1.Ety, e.EV.E1.EV.E1.Ety); 3427 if (retregs & (mXMM1 | mXMM0 |mST01 | mST0)) // if return in ST0 3428 { 3429 codelem(cdb,e.EV.E1,pretregs,false); 3430 if (*pretregs & mST0) 3431 note87(e, 0, 0); 3432 return; 3433 } 3434 else 3435 break; 3436 } 3437 goto Lload87; 3438 3439 case OPs64_d: 3440 if (!I64) 3441 goto Lload87; 3442 goto case OPs32_d; 3443 3444 case OPs32_d: 3445 if (config.fpxmmregs && *pretregs & XMMREGS) 3446 { 3447 xmmcnvt(cdb, e, pretregs); 3448 return; 3449 } 3450 goto Lload87; 3451 3452 case OPs16_d: 3453 case OPu16_d: 3454 Lload87: 3455 load87(cdb,e,0,pretregs,null,-1); 3456 return; 3457 3458 case OPu32_d: 3459 if (I64 && config.fpxmmregs && *pretregs & XMMREGS) 3460 { 3461 xmmcnvt(cdb,e,pretregs); 3462 return; 3463 } 3464 else if (!I16) 3465 { 3466 regm_t retregsx = ALLREGS; 3467 codelem(cdb,e.EV.E1, &retregsx, false); 3468 reg_t reg = findreg(retregsx); 3469 cdb.genfltreg(STO, reg, 0); 3470 regwithvalue(cdb,ALLREGS,0,®,0); 3471 cdb.genfltreg(STO, reg, 4); 3472 3473 push87(cdb); 3474 cdb.genfltreg(0xDF,5,0); // FILD m64int 3475 3476 regm_t retregsy = mST0 /*| (*pretregs & mPSW)*/; 3477 fixresult87(cdb, e, retregsy, pretregs); 3478 return; 3479 } 3480 break; 3481 3482 case OPd_s64: 3483 if (!I64) 3484 goto Lcnvt87; 3485 goto case OPd_s32; 3486 3487 case OPd_s32: 3488 if (config.fpxmmregs) 3489 { 3490 xmmcnvt(cdb,e,pretregs); 3491 return; 3492 } 3493 goto Lcnvt87; 3494 3495 case OPd_s16: 3496 case OPd_u16: 3497 Lcnvt87: 3498 cnvt87(cdb,e,pretregs); 3499 return; 3500 3501 case OPd_u32: // use subroutine, not 8087 3502 if (I64 && config.fpxmmregs) 3503 { 3504 xmmcnvt(cdb,e,pretregs); 3505 return; 3506 } 3507 if (I32 || I64) 3508 { 3509 cdd_u32(cdb,e,pretregs); 3510 return; 3511 } 3512 if (config.exe & EX_posix) 3513 { 3514 retregs = mST0; 3515 } 3516 else 3517 { 3518 retregs = DOUBLEREGS; 3519 } 3520 goto L1; 3521 3522 case OPd_u64: 3523 if (I32 || I64) 3524 { 3525 cdd_u64(cdb,e,pretregs); 3526 return; 3527 } 3528 retregs = DOUBLEREGS; 3529 goto L1; 3530 3531 case OPu64_d: 3532 if (*pretregs & mST0) 3533 { 3534 regm_t retregsx = I64 ? mAX : mAX|mDX; 3535 codelem(cdb,e.EV.E1,&retregsx,false); 3536 callclib(cdb,e,CLIB.u64_ldbl,pretregs,0); 3537 return; 3538 } 3539 break; 3540 3541 case OPld_u64: 3542 { 3543 if (I32 || I64) 3544 { 3545 cdd_u64(cdb,e,pretregs); 3546 return; 3547 } 3548 regm_t retregsx = mST0; 3549 codelem(cdb,e.EV.E1,&retregsx,false); 3550 callclib(cdb,e,CLIB.ld_u64,pretregs,0); 3551 return; 3552 } 3553 3554 default: 3555 break; 3556 } 3557 } 3558 retregs = regmask(e.EV.E1.Ety, TYnfunc); 3559 L1: 3560 codelem(cdb,e.EV.E1,&retregs,false); 3561 for (int i = 0; 1; i++) 3562 { 3563 assert(i < clib.length); 3564 if (clib[i][0] == e.Eoper) 3565 { 3566 callclib(cdb,e,clib[i][1],pretregs,0); 3567 break; 3568 } 3569 } 3570 } 3571 3572 3573 /*************************** 3574 * Convert short to long. 3575 * For OPs16_32, OPu16_32, OPnp_fp, OPu32_64, OPs32_64, 3576 * OPu64_128, OPs64_128 3577 */ 3578 3579 void cdshtlng(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3580 { 3581 reg_t reg; 3582 regm_t retregs; 3583 3584 //printf("cdshtlng(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 3585 int e1comsub = e.EV.E1.Ecount; 3586 ubyte op = e.Eoper; 3587 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 3588 { 3589 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 3590 return; 3591 } 3592 else if ( 3593 op == OPnp_fp || 3594 (I16 && op == OPu16_32) || 3595 (I32 && op == OPu32_64) 3596 ) 3597 { 3598 /* Result goes into a register pair. 3599 * Zero extend by putting a zero into most significant reg. 3600 */ 3601 3602 regm_t retregsx = *pretregs & mLSW; 3603 assert(retregsx); 3604 tym_t tym1 = tybasic(e.EV.E1.Ety); 3605 codelem(cdb,e.EV.E1,&retregsx,false); 3606 3607 regm_t regm = *pretregs & (mMSW & ALLREGS); 3608 if (regm == 0) // *pretregs could be mES 3609 regm = mMSW & ALLREGS; 3610 allocreg(cdb,®m,®,TYint); 3611 if (e1comsub) 3612 getregs(cdb,retregsx); 3613 if (op == OPnp_fp) 3614 { 3615 int segreg; 3616 3617 // BUG: what about pointers to functions? 3618 switch (tym1) 3619 { 3620 case TYimmutPtr: 3621 case TYnptr: segreg = SEG_DS; break; 3622 case TYcptr: segreg = SEG_CS; break; 3623 case TYsptr: segreg = SEG_SS; break; 3624 default: assert(0); 3625 } 3626 cdb.gen2(0x8C,modregrm(3,segreg,reg)); // MOV reg,segreg 3627 } 3628 else 3629 movregconst(cdb,reg,0,0); // 0 extend 3630 3631 fixresult(cdb,e,retregsx | regm,pretregs); 3632 return; 3633 } 3634 else if (I64 && op == OPu32_64) 3635 { 3636 elem *e1 = e.EV.E1; 3637 retregs = *pretregs; 3638 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3639 { 3640 code cs; 3641 3642 allocreg(cdb,&retregs,®,TYint); 3643 loadea(cdb,e1,&cs,LOD,reg,0,retregs,retregs); // MOV Ereg,EA 3644 freenode(e1); 3645 } 3646 else 3647 { 3648 *pretregs &= ~mPSW; // flags are set by eval of e1 3649 codelem(cdb,e1,&retregs,false); 3650 /* Determine if high 32 bits are already 0 3651 */ 3652 if (e1.Eoper == OPu16_32 && !e1.Ecount) 3653 { 3654 } 3655 else 3656 { 3657 // Zero high 32 bits 3658 getregs(cdb,retregs); 3659 reg = findreg(retregs); 3660 // Don't use x89 because that will get optimized away 3661 genregs(cdb,LOD,reg,reg); // MOV Ereg,Ereg 3662 } 3663 } 3664 fixresult(cdb,e,retregs,pretregs); 3665 return; 3666 } 3667 else if (I64 && op == OPs32_64 && OTrel(e.EV.E1.Eoper) && !e.EV.E1.Ecount) 3668 { 3669 /* Due to how e1 is calculated, the high 32 bits of the register 3670 * are already 0. 3671 */ 3672 retregs = *pretregs; 3673 codelem(cdb,e.EV.E1,&retregs,false); 3674 fixresult(cdb,e,retregs,pretregs); 3675 return; 3676 } 3677 else if (!I16 && (op == OPs16_32 || op == OPu16_32) || 3678 I64 && op == OPs32_64) 3679 { 3680 elem *e11; 3681 elem *e1 = e.EV.E1; 3682 3683 if (e1.Eoper == OPu8_16 && !e1.Ecount && 3684 ((e11 = e1.EV.E1).Eoper == OPvar || (e11.Eoper == OPind && !e11.Ecount)) 3685 ) 3686 { 3687 code cs; 3688 3689 retregs = *pretregs & BYTEREGS; 3690 if (!retregs) 3691 retregs = BYTEREGS; 3692 allocreg(cdb,&retregs,®,TYint); 3693 movregconst(cdb,reg,0,0); // XOR reg,reg 3694 loadea(cdb,e11,&cs,0x8A,reg,0,retregs,retregs); // MOV regL,EA 3695 freenode(e11); 3696 freenode(e1); 3697 } 3698 else if (e1.Eoper == OPvar || 3699 (e1.Eoper == OPind && !e1.Ecount)) 3700 { 3701 code cs = void; 3702 3703 if (I32 && op == OPu16_32 && config.flags4 & CFG4speed) 3704 goto L2; 3705 retregs = *pretregs; 3706 allocreg(cdb,&retregs,®,TYint); 3707 const opcode = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,EA 3708 if (op == OPs32_64) 3709 { 3710 assert(I64); 3711 // MOVSXD reg,e1 3712 loadea(cdb,e1,&cs,0x63,reg,0,0,retregs); 3713 code_orrex(cdb.last(), REX_W); 3714 } 3715 else 3716 loadea(cdb,e1,&cs,opcode,reg,0,0,retregs); 3717 freenode(e1); 3718 } 3719 else 3720 { 3721 L2: 3722 retregs = *pretregs; 3723 if (op == OPs32_64) 3724 retregs = mAX | (*pretregs & mPSW); 3725 *pretregs &= ~mPSW; // flags are already set 3726 CodeBuilder cdbx; 3727 cdbx.ctor(); 3728 codelem(cdbx,e1,&retregs,false); 3729 code *cx = cdbx.finish(); 3730 cdb.append(cdbx); 3731 getregs(cdb,retregs); 3732 if (op == OPu16_32 && cx) 3733 { 3734 cx = code_last(cx); 3735 if (cx.Iop == 0x81 && (cx.Irm & modregrm(3,7,0)) == modregrm(3,4,0) && 3736 mask(cx.Irm & 7) == retregs) 3737 { 3738 // Convert AND of a word to AND of a dword, zeroing upper word 3739 if (cx.Irex & REX_B) 3740 retregs = mask(8 | (cx.Irm & 7)); 3741 cx.Iflags &= ~CFopsize; 3742 cx.IEV2.Vint &= 0xFFFF; 3743 goto L1; 3744 } 3745 } 3746 if (op == OPs16_32 && retregs == mAX) 3747 cdb.gen1(0x98); // CWDE 3748 else if (op == OPs32_64 && retregs == mAX) 3749 { 3750 cdb.gen1(0x98); // CDQE 3751 code_orrex(cdb.last(), REX_W); 3752 } 3753 else 3754 { 3755 reg = findreg(retregs); 3756 if (config.flags4 & CFG4speed && op == OPu16_32) 3757 { // AND reg,0xFFFF 3758 cdb.genc2(0x81,modregrmx(3,4,reg),0xFFFFu); 3759 } 3760 else 3761 { 3762 opcode_t iop = (op == OPu16_32) ? MOVZXw : MOVSXw; // MOVZX/MOVSX reg,reg 3763 genregs(cdb,iop,reg,reg); 3764 } 3765 } 3766 L1: 3767 if (e1comsub) 3768 getregs(cdb,retregs); 3769 } 3770 fixresult(cdb,e,retregs,pretregs); 3771 return; 3772 } 3773 else if (*pretregs & mPSW || config.target_cpu < TARGET_80286) 3774 { 3775 // OPs16_32, OPs32_64 3776 // CWD doesn't affect flags, so we can depend on the integer 3777 // math to provide the flags. 3778 retregs = mAX | mPSW; // want integer result in AX 3779 *pretregs &= ~mPSW; // flags are already set 3780 codelem(cdb,e.EV.E1,&retregs,false); 3781 getregs(cdb,mDX); // sign extend into DX 3782 cdb.gen1(0x99); // CWD/CDQ 3783 if (e1comsub) 3784 getregs(cdb,retregs); 3785 fixresult(cdb,e,mDX | retregs,pretregs); 3786 return; 3787 } 3788 else 3789 { 3790 // OPs16_32, OPs32_64 3791 uint msreg,lsreg; 3792 3793 retregs = *pretregs & mLSW; 3794 assert(retregs); 3795 codelem(cdb,e.EV.E1,&retregs,false); 3796 retregs |= *pretregs & mMSW; 3797 allocreg(cdb,&retregs,®,e.Ety); 3798 msreg = findregmsw(retregs); 3799 lsreg = findreglsw(retregs); 3800 genmovreg(cdb,msreg,lsreg); // MOV msreg,lsreg 3801 assert(config.target_cpu >= TARGET_80286); // 8088 can't handle SAR reg,imm8 3802 cdb.genc2(0xC1,modregrm(3,7,msreg),REGSIZE * 8 - 1); // SAR msreg,31 3803 fixresult(cdb,e,retregs,pretregs); 3804 return; 3805 } 3806 } 3807 3808 3809 /*************************** 3810 * Convert byte to int. 3811 * For OPu8_16 and OPs8_16. 3812 */ 3813 3814 void cdbyteint(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3815 { 3816 regm_t retregs; 3817 char size; 3818 3819 if ((*pretregs & (ALLREGS | mBP)) == 0) // if don't need result in regs 3820 { 3821 codelem(cdb,e.EV.E1,pretregs,false); // then conversion isn't necessary 3822 return; 3823 } 3824 3825 //printf("cdbyteint(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 3826 char op = e.Eoper; 3827 elem *e1 = e.EV.E1; 3828 if (e1.Eoper == OPcomma) 3829 docommas(cdb,&e1); 3830 if (!I16) 3831 { 3832 if (e1.Eoper == OPvar || (e1.Eoper == OPind && !e1.Ecount)) 3833 { 3834 code cs; 3835 3836 regm_t retregsx = *pretregs; 3837 reg_t reg; 3838 allocreg(cdb,&retregsx,®,TYint); 3839 if (config.flags4 & CFG4speed && 3840 op == OPu8_16 && mask(reg) & BYTEREGS && 3841 config.target_cpu < TARGET_PentiumPro) 3842 { 3843 movregconst(cdb,reg,0,0); // XOR reg,reg 3844 loadea(cdb,e1,&cs,0x8A,reg,0,retregsx,retregsx); // MOV regL,EA 3845 } 3846 else 3847 { 3848 const opcode = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,EA 3849 loadea(cdb,e1,&cs,opcode,reg,0,0,retregsx); 3850 } 3851 freenode(e1); 3852 fixresult(cdb,e,retregsx,pretregs); 3853 return; 3854 } 3855 size = tysize(e.Ety); 3856 retregs = *pretregs & BYTEREGS; 3857 if (retregs == 0) 3858 retregs = BYTEREGS; 3859 retregs |= *pretregs & mPSW; 3860 *pretregs &= ~mPSW; 3861 } 3862 else 3863 { 3864 if (op == OPu8_16) // if uint conversion 3865 { 3866 retregs = *pretregs & BYTEREGS; 3867 if (retregs == 0) 3868 retregs = BYTEREGS; 3869 } 3870 else 3871 { 3872 // CBW doesn't affect flags, so we can depend on the integer 3873 // math to provide the flags. 3874 retregs = mAX | (*pretregs & mPSW); // want integer result in AX 3875 } 3876 } 3877 3878 CodeBuilder cdb1; 3879 cdb1.ctor(); 3880 codelem(cdb1,e1,&retregs,false); 3881 code *c1 = cdb1.finish(); 3882 cdb.append(cdb1); 3883 reg_t reg = findreg(retregs); 3884 code *c; 3885 if (!c1) 3886 goto L1; 3887 3888 // If previous instruction is an AND bytereg,value 3889 c = cdb.last(); 3890 if (c.Iop == 0x80 && c.Irm == modregrm(3,4,reg & 7) && 3891 (op == OPu8_16 || (c.IEV2.Vuns & 0x80) == 0)) 3892 { 3893 if (*pretregs & mPSW) 3894 c.Iflags |= CFpsw; 3895 c.Iop |= 1; // convert to word operation 3896 c.IEV2.Vuns &= 0xFF; // dump any high order bits 3897 *pretregs &= ~mPSW; // flags already set 3898 } 3899 else 3900 { 3901 L1: 3902 if (!I16) 3903 { 3904 if (op == OPs8_16 && reg == AX && size == 2) 3905 { 3906 cdb.gen1(0x98); // CBW 3907 cdb.last().Iflags |= CFopsize; // don't do a CWDE 3908 } 3909 else 3910 { 3911 // We could do better by not forcing the src and dst 3912 // registers to be the same. 3913 3914 if (config.flags4 & CFG4speed && op == OPu8_16) 3915 { // AND reg,0xFF 3916 cdb.genc2(0x81,modregrmx(3,4,reg),0xFF); 3917 } 3918 else 3919 { 3920 opcode_t iop = (op == OPu8_16) ? MOVZXb : MOVSXb; // MOVZX/MOVSX reg,reg 3921 genregs(cdb,iop,reg,reg); 3922 if (I64 && reg >= 4) 3923 code_orrex(cdb.last(), REX); 3924 } 3925 } 3926 } 3927 else 3928 { 3929 if (op == OPu8_16) 3930 genregs(cdb,0x30,reg+4,reg+4); // XOR regH,regH 3931 else 3932 { 3933 cdb.gen1(0x98); // CBW 3934 *pretregs &= ~mPSW; // flags already set 3935 } 3936 } 3937 } 3938 getregs(cdb,retregs); 3939 fixresult(cdb,e,retregs,pretregs); 3940 } 3941 3942 3943 /*************************** 3944 * Convert long to short (OP32_16). 3945 * Get offset of far pointer (OPoffset). 3946 * Convert int to byte (OP16_8). 3947 * Convert long long to long (OP64_32). 3948 * OP128_64 3949 */ 3950 3951 void cdlngsht(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 3952 { 3953 debug 3954 { 3955 switch (e.Eoper) 3956 { 3957 case OP32_16: 3958 case OPoffset: 3959 case OP16_8: 3960 case OP64_32: 3961 case OP128_64: 3962 break; 3963 3964 default: 3965 assert(0); 3966 } 3967 } 3968 3969 regm_t retregs; 3970 if (e.Eoper == OP16_8) 3971 { 3972 retregs = *pretregs ? BYTEREGS : 0; 3973 codelem(cdb,e.EV.E1,&retregs,false); 3974 } 3975 else 3976 { 3977 if (e.EV.E1.Eoper == OPrelconst) 3978 offsetinreg(cdb,e.EV.E1,&retregs); 3979 else 3980 { 3981 retregs = *pretregs ? ALLREGS : 0; 3982 codelem(cdb,e.EV.E1,&retregs,false); 3983 bool isOff = e.Eoper == OPoffset; 3984 if (I16 || 3985 I32 && (isOff || e.Eoper == OP64_32) || 3986 I64 && (isOff || e.Eoper == OP128_64)) 3987 retregs &= mLSW; // want LSW only 3988 } 3989 } 3990 3991 /* We "destroy" a reg by assigning it the result of a new e, even 3992 * though the values are the same. Weakness of our CSE strategy that 3993 * a register can only hold the contents of one elem at a time. 3994 */ 3995 if (e.Ecount) 3996 getregs(cdb,retregs); 3997 else 3998 useregs(retregs); 3999 4000 debug 4001 if (!(!*pretregs || retregs)) 4002 { 4003 WROP(e.Eoper), 4004 printf(" *pretregs = %s, retregs = %s, e = %p\n",regm_str(*pretregs),regm_str(retregs),e); 4005 } 4006 4007 assert(!*pretregs || retregs); 4008 fixresult(cdb,e,retregs,pretregs); // lsw only 4009 } 4010 4011 /********************************************** 4012 * Get top 32 bits of 64 bit value (I32) 4013 * or top 16 bits of 32 bit value (I16) 4014 * or top 64 bits of 128 bit value (I64). 4015 * OPmsw 4016 */ 4017 4018 void cdmsw(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4019 { 4020 assert(e.Eoper == OPmsw); 4021 4022 regm_t retregs = *pretregs ? ALLREGS : 0; 4023 codelem(cdb,e.EV.E1,&retregs,false); 4024 retregs &= mMSW; // want MSW only 4025 4026 /* We "destroy" a reg by assigning it the result of a new e, even 4027 * though the values are the same. Weakness of our CSE strategy that 4028 * a register can only hold the contents of one elem at a time. 4029 */ 4030 if (e.Ecount) 4031 getregs(cdb,retregs); 4032 else 4033 useregs(retregs); 4034 4035 debug 4036 if (!(!*pretregs || retregs)) 4037 { WROP(e.Eoper); 4038 printf(" *pretregs = %s, retregs = %s\n",regm_str(*pretregs),regm_str(retregs)); 4039 elem_print(e); 4040 } 4041 4042 assert(!*pretregs || retregs); 4043 fixresult(cdb,e,retregs,pretregs); // msw only 4044 } 4045 4046 4047 4048 /****************************** 4049 * Handle operators OPinp and OPoutp. 4050 */ 4051 4052 void cdport(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4053 { 4054 //printf("cdport\n"); 4055 ubyte op = 0xE4; // root of all IN/OUT opcodes 4056 elem *e1 = e.EV.E1; 4057 4058 // See if we can use immediate mode of IN/OUT opcodes 4059 ubyte port; 4060 if (e1.Eoper == OPconst && e1.EV.Vuns <= 255 && 4061 (!evalinregister(e1) || regcon.mvar & mDX)) 4062 { 4063 port = cast(ubyte)e1.EV.Vuns; 4064 freenode(e1); 4065 } 4066 else 4067 { 4068 regm_t retregs = mDX; // port number is always DX 4069 codelem(cdb,e1,&retregs,false); 4070 op |= 0x08; // DX version of opcode 4071 port = 0; // not logically needed, but 4072 // quiets "uninitialized var" complaints 4073 } 4074 4075 uint sz; 4076 if (e.Eoper == OPoutp) 4077 { 4078 sz = tysize(e.EV.E2.Ety); 4079 regm_t retregs = mAX; // byte/word to output is in AL/AX 4080 scodelem(cdb,e.EV.E2,&retregs,((op & 0x08) ? mDX : 0),true); 4081 op |= 0x02; // OUT opcode 4082 } 4083 else // OPinp 4084 { 4085 getregs(cdb,mAX); 4086 sz = tysize(e.Ety); 4087 } 4088 4089 if (sz != 1) 4090 op |= 1; // word operation 4091 cdb.genc2(op,0,port); // IN/OUT AL/AX,DX/port 4092 if (op & 1 && sz != REGSIZE) // if need size override 4093 cdb.last().Iflags |= CFopsize; 4094 regm_t retregs = mAX; 4095 fixresult(cdb,e,retregs,pretregs); 4096 } 4097 4098 /************************ 4099 * Generate code for an asm elem. 4100 */ 4101 4102 void cdasm(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4103 { 4104 // Assume only regs normally destroyed by a function are destroyed 4105 getregs(cdb,(ALLREGS | mES) & ~fregsaved); 4106 cdb.genasm(cast(char *)e.EV.Vstring, cast(uint) e.EV.Vstrlen); 4107 fixresult(cdb,e,(I16 ? mDX | mAX : mAX),pretregs); 4108 } 4109 4110 /************************ 4111 * Generate code for OPnp_f16p and OPf16p_np. 4112 */ 4113 4114 void cdfar16(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4115 { 4116 code *cnop; 4117 code cs; 4118 4119 assert(I32); 4120 codelem(cdb,e.EV.E1,pretregs,false); 4121 reg_t reg = findreg(*pretregs); 4122 getregs(cdb,*pretregs); // we will destroy the regs 4123 4124 cs.Iop = 0xC1; 4125 cs.Irm = modregrm(3,0,reg); 4126 cs.Iflags = 0; 4127 cs.Irex = 0; 4128 cs.IFL2 = FLconst; 4129 cs.IEV2.Vuns = 16; 4130 4131 cdb.gen(&cs); // ROL ereg,16 4132 cs.Irm |= modregrm(0,1,0); 4133 cdb.gen(&cs); // ROR ereg,16 4134 cs.IEV2.Vuns = 3; 4135 cs.Iflags |= CFopsize; 4136 4137 if (e.Eoper == OPnp_f16p) 4138 { 4139 /* OR ereg,ereg 4140 JE L1 4141 ROR ereg,16 4142 SHL reg,3 4143 MOV rx,SS 4144 AND rx,3 ;mask off CPL bits 4145 OR rl,4 ;run on LDT bit 4146 OR regl,rl 4147 ROL ereg,16 4148 L1: NOP 4149 */ 4150 reg_t rx; 4151 4152 regm_t retregs = BYTEREGS & ~*pretregs; 4153 allocreg(cdb,&retregs,&rx,TYint); 4154 cnop = gennop(null); 4155 int jop = JCXZ; 4156 if (reg != CX) 4157 { 4158 gentstreg(cdb,reg); 4159 jop = JE; 4160 } 4161 genjmp(cdb,jop,FLcode, cast(block *)cnop); // Jop L1 4162 NEWREG(cs.Irm,4); 4163 cdb.gen(&cs); // SHL reg,3 4164 genregs(cdb,0x8C,2,rx); // MOV rx,SS 4165 int isbyte = (mask(reg) & BYTEREGS) == 0; 4166 cdb.genc2(0x80 | isbyte,modregrm(3,4,rx),3); // AND rl,3 4167 cdb.genc2(0x80,modregrm(3,1,rx),4); // OR rl,4 4168 genregs(cdb,0x0A | isbyte,reg,rx); // OR regl,rl 4169 } 4170 else // OPf16p_np 4171 { 4172 /* ROR ereg,16 4173 SHR reg,3 4174 ROL ereg,16 4175 */ 4176 4177 cs.Irm |= modregrm(0,5,0); 4178 cdb.gen(&cs); // SHR reg,3 4179 cnop = null; 4180 } 4181 } 4182 4183 /************************* 4184 * Generate code for OPbtst 4185 */ 4186 4187 void cdbtst(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4188 { 4189 regm_t retregs; 4190 reg_t reg; 4191 4192 //printf("cdbtst(e = %p, *pretregs = %s\n", e, regm_str(*pretregs)); 4193 4194 opcode_t op = 0xA3; // BT EA,value 4195 int mode = 4; 4196 4197 elem *e1 = e.EV.E1; 4198 elem *e2 = e.EV.E2; 4199 code cs; 4200 cs.Iflags = 0; 4201 4202 if (*pretregs == 0) // if don't want result 4203 { 4204 codelem(cdb,e1,pretregs,false); // eval left leaf 4205 *pretregs = 0; // in case they got set 4206 codelem(cdb,e2,pretregs,false); 4207 return; 4208 } 4209 4210 regm_t idxregs; 4211 if ((e1.Eoper == OPind && !e1.Ecount) || e1.Eoper == OPvar) 4212 { 4213 getlvalue(cdb, &cs, e1, RMload); // get addressing mode 4214 idxregs = idxregm(&cs); // mask if index regs used 4215 } 4216 else 4217 { 4218 retregs = tysize(e1.Ety) == 1 ? BYTEREGS : allregs; 4219 codelem(cdb,e1, &retregs, false); 4220 reg = findreg(retregs); 4221 cs.Irm = modregrm(3,0,reg & 7); 4222 cs.Iflags = 0; 4223 cs.Irex = 0; 4224 if (reg & 8) 4225 cs.Irex |= REX_B; 4226 idxregs = retregs; 4227 } 4228 4229 tym_t ty1 = tybasic(e1.Ety); 4230 const sz = tysize(e1.Ety); 4231 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 4232 4233 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 4234 if (e2.Eoper == OPconst) 4235 { 4236 cs.Iop = 0x0FBA; // BT rm,imm8 4237 cs.Irm |= modregrm(0,mode,0); 4238 cs.Iflags |= CFpsw | word; 4239 cs.IFL2 = FLconst; 4240 if (sz <= SHORTSIZE) 4241 { 4242 cs.IEV2.Vint = e2.EV.Vint & 15; 4243 } 4244 else if (sz == 4) 4245 { 4246 cs.IEV2.Vint = e2.EV.Vint & 31; 4247 } 4248 else 4249 { 4250 cs.IEV2.Vint = e2.EV.Vint & 63; 4251 if (I64) 4252 cs.Irex |= REX_W; 4253 } 4254 cdb.gen(&cs); 4255 } 4256 else 4257 { 4258 retregs = ALLREGS & ~idxregs; 4259 4260 /* A register variable may not have its upper 32 4261 * bits 0, so pick a different register to force 4262 * a MOV which will clear it 4263 */ 4264 if (I64 && sz == 8 && tysize(e2.Ety) == 4) 4265 { 4266 regm_t rregm; 4267 if (isregvar(e2, &rregm, null)) 4268 retregs &= ~rregm; 4269 } 4270 4271 scodelem(cdb,e2,&retregs,idxregs,true); 4272 reg = findreg(retregs); 4273 4274 cs.Iop = 0x0F00 | op; // BT rm,reg 4275 code_newreg(&cs,reg); 4276 cs.Iflags |= CFpsw | word; 4277 if (I64 && _tysize[ty1] == 8) 4278 cs.Irex |= REX_W; 4279 cdb.gen(&cs); 4280 } 4281 4282 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 4283 { 4284 if (tysize(e.Ety) == 1) 4285 { 4286 assert(I64 || retregs & BYTEREGS); 4287 allocreg(cdb,&retregs,®,TYint); 4288 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 4289 if (I64 && reg >= 4) 4290 code_orrex(cdb.last(), REX); 4291 *pretregs = retregs; 4292 } 4293 else 4294 { 4295 code *cnop = null; 4296 regm_t save = regcon.immed.mval; 4297 allocreg(cdb,&retregs,®,TYint); 4298 regcon.immed.mval = save; 4299 if ((*pretregs & mPSW) == 0) 4300 { 4301 getregs(cdb,retregs); 4302 genregs(cdb,0x19,reg,reg); // SBB reg,reg 4303 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 4304 } 4305 else 4306 { 4307 movregconst(cdb,reg,1,8); // MOV reg,1 4308 cnop = gennop(null); 4309 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 4310 // MOV reg,0 4311 movregconst(cdb,reg,0,8); 4312 regcon.immed.mval &= ~mask(reg); 4313 } 4314 *pretregs = retregs; 4315 cdb.append(cnop); 4316 } 4317 } 4318 } 4319 4320 /************************* 4321 * Generate code for OPbt, OPbtc, OPbtr, OPbts 4322 */ 4323 4324 void cdbt(ref CodeBuilder cdb,elem *e, regm_t *pretregs) 4325 { 4326 //printf("cdbt(%p, %s)\n", e, regm_str(*pretregs)); 4327 regm_t retregs; 4328 reg_t reg; 4329 opcode_t op; 4330 int mode; 4331 4332 switch (e.Eoper) 4333 { 4334 case OPbt: op = 0xA3; mode = 4; break; 4335 case OPbtc: op = 0xBB; mode = 7; break; 4336 case OPbtr: op = 0xB3; mode = 6; break; 4337 case OPbts: op = 0xAB; mode = 5; break; 4338 4339 default: 4340 assert(0); 4341 } 4342 4343 elem *e1 = e.EV.E1; 4344 elem *e2 = e.EV.E2; 4345 code cs; 4346 cs.Iflags = 0; 4347 4348 getlvalue(cdb, &cs, e, RMload); // get addressing mode 4349 if (e.Eoper == OPbt && *pretregs == 0) 4350 { 4351 codelem(cdb,e2,pretregs,false); 4352 return; 4353 } 4354 4355 const ty1 = tybasic(e1.Ety); 4356 const ty2 = tybasic(e2.Ety); 4357 ubyte word = (!I16 && _tysize[ty1] == SHORTSIZE) ? CFopsize : 0; 4358 regm_t idxregs = idxregm(&cs); // mask if index regs used 4359 4360 // if (e2.Eoper == OPconst && e2.EV.Vuns < 0x100) // should do this instead? 4361 if (e2.Eoper == OPconst) 4362 { 4363 cs.Iop = 0x0FBA; // BT rm,imm8 4364 cs.Irm |= modregrm(0,mode,0); 4365 cs.Iflags |= CFpsw | word; 4366 cs.IFL2 = FLconst; 4367 if (_tysize[ty1] == SHORTSIZE) 4368 { 4369 cs.IEV1.Voffset += (e2.EV.Vuns & ~15) >> 3; 4370 cs.IEV2.Vint = e2.EV.Vint & 15; 4371 } 4372 else if (_tysize[ty1] == 4) 4373 { 4374 cs.IEV1.Voffset += (e2.EV.Vuns & ~31) >> 3; 4375 cs.IEV2.Vint = e2.EV.Vint & 31; 4376 } 4377 else 4378 { 4379 cs.IEV1.Voffset += (e2.EV.Vuns & ~63) >> 3; 4380 cs.IEV2.Vint = e2.EV.Vint & 63; 4381 if (I64) 4382 cs.Irex |= REX_W; 4383 } 4384 cdb.gen(&cs); 4385 } 4386 else 4387 { 4388 retregs = ALLREGS & ~idxregs; 4389 scodelem(cdb,e2,&retregs,idxregs,true); 4390 reg = findreg(retregs); 4391 4392 cs.Iop = 0x0F00 | op; // BT rm,reg 4393 code_newreg(&cs,reg); 4394 cs.Iflags |= CFpsw | word; 4395 if (_tysize[ty2] == 8 && I64) 4396 cs.Irex |= REX_W; 4397 cdb.gen(&cs); 4398 } 4399 4400 if ((retregs = (*pretregs & (ALLREGS | mBP))) != 0) // if return result in register 4401 { 4402 if (_tysize[e.Ety] == 1) 4403 { 4404 assert(I64 || retregs & BYTEREGS); 4405 allocreg(cdb,&retregs,®,TYint); 4406 cdb.gen2(0x0F92,modregrmx(3,0,reg)); // SETC reg 4407 if (I64 && reg >= 4) 4408 code_orrex(cdb.last(), REX); 4409 *pretregs = retregs; 4410 } 4411 else 4412 { 4413 code *cnop = null; 4414 const save = regcon.immed.mval; 4415 allocreg(cdb,&retregs,®,TYint); 4416 regcon.immed.mval = save; 4417 if ((*pretregs & mPSW) == 0) 4418 { 4419 getregs(cdb,retregs); 4420 genregs(cdb,0x19,reg,reg); // SBB reg,reg 4421 cdb.gen2(0xF7,modregrmx(3,3,reg)); // NEG reg 4422 } 4423 else 4424 { 4425 movregconst(cdb,reg,1,8); // MOV reg,1 4426 cnop = gennop(null); 4427 genjmp(cdb,JC,FLcode, cast(block *) cnop); // Jtrue nop 4428 // MOV reg,0 4429 movregconst(cdb,reg,0,8); 4430 regcon.immed.mval &= ~mask(reg); 4431 } 4432 *pretregs = retregs; 4433 cdb.append(cnop); 4434 } 4435 } 4436 } 4437 4438 /************************************* 4439 * Generate code for OPbsf and OPbsr. 4440 */ 4441 4442 void cdbscan(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4443 { 4444 //printf("cdbscan()\n"); 4445 //elem_print(e); 4446 if (!*pretregs) 4447 { 4448 codelem(cdb,e.EV.E1,pretregs,false); 4449 return; 4450 } 4451 4452 const tyml = tybasic(e.EV.E1.Ety); 4453 const sz = _tysize[tyml]; 4454 assert(sz == 2 || sz == 4 || sz == 8); 4455 code cs = void; 4456 4457 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 4458 { 4459 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 4460 } 4461 else 4462 { 4463 regm_t retregs = allregs; 4464 codelem(cdb,e.EV.E1, &retregs, false); 4465 const reg = findreg(retregs); 4466 cs.Irm = modregrm(3,0,reg & 7); 4467 cs.Iflags = 0; 4468 cs.Irex = 0; 4469 if (reg & 8) 4470 cs.Irex |= REX_B; 4471 } 4472 4473 regm_t retregs = *pretregs & allregs; 4474 if (!retregs) 4475 retregs = allregs; 4476 reg_t reg; 4477 allocreg(cdb,&retregs, ®, e.Ety); 4478 4479 cs.Iop = (e.Eoper == OPbsf) ? 0x0FBC : 0x0FBD; // BSF/BSR reg,EA 4480 code_newreg(&cs, reg); 4481 if (!I16 && sz == SHORTSIZE) 4482 cs.Iflags |= CFopsize; 4483 cdb.gen(&cs); 4484 if (sz == 8) 4485 code_orrex(cdb.last(), REX_W); 4486 4487 fixresult(cdb,e,retregs,pretregs); 4488 } 4489 4490 /************************ 4491 * OPpopcnt operator 4492 */ 4493 4494 void cdpopcnt(ref CodeBuilder cdb,elem *e,regm_t *pretregs) 4495 { 4496 //printf("cdpopcnt()\n"); 4497 //elem_print(e); 4498 assert(!I16); 4499 if (!*pretregs) 4500 { 4501 codelem(cdb,e.EV.E1,pretregs,false); 4502 return; 4503 } 4504 4505 const tyml = tybasic(e.EV.E1.Ety); 4506 4507 const sz = _tysize[tyml]; 4508 assert(sz == 2 || sz == 4 || (sz == 8 && I64)); // no byte op 4509 4510 code cs = void; 4511 if ((e.EV.E1.Eoper == OPind && !e.EV.E1.Ecount) || e.EV.E1.Eoper == OPvar) 4512 { 4513 getlvalue(cdb, &cs, e.EV.E1, RMload); // get addressing mode 4514 } 4515 else 4516 { 4517 regm_t retregs = allregs; 4518 codelem(cdb,e.EV.E1, &retregs, false); 4519 const reg = findreg(retregs); 4520 cs.Irm = modregrm(3,0,reg & 7); 4521 cs.Iflags = 0; 4522 cs.Irex = 0; 4523 if (reg & 8) 4524 cs.Irex |= REX_B; 4525 } 4526 4527 regm_t retregs = *pretregs & allregs; 4528 if (!retregs) 4529 retregs = allregs; 4530 reg_t reg; 4531 allocreg(cdb,&retregs, ®, e.Ety); 4532 4533 cs.Iop = POPCNT; // POPCNT reg,EA 4534 code_newreg(&cs, reg); 4535 if (sz == SHORTSIZE) 4536 cs.Iflags |= CFopsize; 4537 if (*pretregs & mPSW) 4538 cs.Iflags |= CFpsw; 4539 cdb.gen(&cs); 4540 if (sz == 8) 4541 code_orrex(cdb.last(), REX_W); 4542 *pretregs &= mBP | ALLREGS; // flags already set 4543 4544 fixresult(cdb,e,retregs,pretregs); 4545 } 4546 4547 4548 /******************************************* 4549 * Generate code for OPpair, OPrpair. 4550 */ 4551 4552 void cdpair(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4553 { 4554 if (*pretregs == 0) // if don't want result 4555 { 4556 codelem(cdb,e.EV.E1,pretregs,false); // eval left leaf 4557 *pretregs = 0; // in case they got set 4558 codelem(cdb,e.EV.E2,pretregs,false); 4559 return; 4560 } 4561 4562 //printf("\ncdpair(e = %p, *pretregs = %s)\n", e, regm_str(*pretregs)); 4563 //printf("Ecount = %d\n", e.Ecount); 4564 4565 regm_t retregs = *pretregs; 4566 if (retregs == mPSW && tycomplex(e.Ety) && config.inline8087) 4567 { 4568 if (config.fpxmmregs) 4569 retregs |= mXMM0 | mXMM1; 4570 else 4571 retregs |= mST01; 4572 } 4573 4574 if (retregs & mST01) 4575 { 4576 loadPair87(cdb, e, pretregs); 4577 return; 4578 } 4579 4580 regm_t regs1; 4581 regm_t regs2; 4582 if (retregs & XMMREGS) 4583 { 4584 retregs &= XMMREGS; 4585 const reg = findreg(retregs); 4586 regs1 = mask(reg); 4587 regs2 = mask(findreg(retregs & ~regs1)); 4588 } 4589 else 4590 { 4591 retregs &= allregs; 4592 if (!retregs) 4593 retregs = allregs; 4594 regs1 = retregs & mLSW; 4595 regs2 = retregs & mMSW; 4596 } 4597 if (e.Eoper == OPrpair) 4598 { 4599 // swap 4600 regs1 ^= regs2; 4601 regs2 ^= regs1; 4602 regs1 ^= regs2; 4603 } 4604 //printf("1: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2)); 4605 4606 codelem(cdb,e.EV.E1, ®s1, false); 4607 scodelem(cdb,e.EV.E2, ®s2, regs1, false); 4608 //printf("2: regs1 = %s, regs2 = %s\n", regm_str(regs1), regm_str(regs2)); 4609 4610 if (e.EV.E1.Ecount) 4611 getregs(cdb,regs1); 4612 if (e.EV.E2.Ecount) 4613 getregs(cdb,regs2); 4614 4615 fixresult(cdb,e,regs1 | regs2,pretregs); 4616 } 4617 4618 /************************* 4619 * Generate code for OPcmpxchg 4620 */ 4621 4622 void cdcmpxchg(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4623 { 4624 /* The form is: 4625 * OPcmpxchg 4626 * / \ 4627 * lvalue OPparam 4628 * / \ 4629 * old new 4630 */ 4631 4632 //printf("cdmulass(e=%p, *pretregs = %s)\n",e,regm_str(*pretregs)); 4633 elem *e1 = e.EV.E1; 4634 elem *e2 = e.EV.E2; 4635 assert(e2.Eoper == OPparam); 4636 assert(!e2.Ecount); 4637 4638 const tyml = tybasic(e1.Ety); // type of lvalue 4639 const sz = _tysize[tyml]; 4640 4641 if (I32 && sz == 8) 4642 { 4643 regm_t retregsx = mDX|mAX; 4644 codelem(cdb,e2.EV.E1,&retregsx,false); // [DX,AX] = e2.EV.E1 4645 4646 regm_t retregs = mCX|mBX; 4647 scodelem(cdb,e2.EV.E2,&retregs,mDX|mAX,false); // [CX,BX] = e2.EV.E2 4648 4649 code cs = void; 4650 getlvalue(cdb,&cs,e1,mCX|mBX|mAX|mDX); // get EA 4651 4652 getregs(cdb,mDX|mAX); // CMPXCHG destroys these regs 4653 4654 if (e1.Ety & mTYvolatile) 4655 cdb.gen1(LOCK); // LOCK prefix 4656 cs.Iop = 0x0FC7; // CMPXCHG8B EA 4657 cs.Iflags |= CFpsw; 4658 code_newreg(&cs,1); 4659 cdb.gen(&cs); 4660 4661 assert(!e1.Ecount); 4662 freenode(e1); 4663 } 4664 else 4665 { 4666 const uint isbyte = (sz == 1); // 1 for byte operation 4667 const ubyte word = (!I16 && sz == SHORTSIZE) ? CFopsize : 0; 4668 const uint rex = (I64 && sz == 8) ? REX_W : 0; 4669 4670 regm_t retregsx = mAX; 4671 codelem(cdb,e2.EV.E1,&retregsx,false); // AX = e2.EV.E1 4672 4673 regm_t retregs = (ALLREGS | mBP) & ~mAX; 4674 scodelem(cdb,e2.EV.E2,&retregs,mAX,false); // load rvalue in reg 4675 4676 code cs = void; 4677 getlvalue(cdb,&cs,e1,mAX | retregs); // get EA 4678 4679 getregs(cdb,mAX); // CMPXCHG destroys AX 4680 4681 if (e1.Ety & mTYvolatile) 4682 cdb.gen1(LOCK); // LOCK prefix 4683 cs.Iop = 0x0FB1 ^ isbyte; // CMPXCHG EA,reg 4684 cs.Iflags |= CFpsw | word; 4685 cs.Irex |= rex; 4686 const reg = findreg(retregs); 4687 code_newreg(&cs,reg); 4688 cdb.gen(&cs); 4689 4690 assert(!e1.Ecount); 4691 freenode(e1); 4692 } 4693 4694 if (regm_t retregs = *pretregs & (ALLREGS | mBP)) // if return result in register 4695 { 4696 assert(tysize(e.Ety) == 1); 4697 assert(I64 || retregs & BYTEREGS); 4698 reg_t reg; 4699 allocreg(cdb,&retregs,®,TYint); 4700 uint ea = modregrmx(3,0,reg); 4701 if (I64 && reg >= 4) 4702 ea |= REX << 16; 4703 cdb.gen2(0x0F94,ea); // SETZ reg 4704 *pretregs = retregs; 4705 } 4706 } 4707 4708 /************************* 4709 * Generate code for OPprefetch 4710 */ 4711 4712 void cdprefetch(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 4713 { 4714 /* Generate the following based on e2: 4715 * 0: prefetch0 4716 * 1: prefetch1 4717 * 2: prefetch2 4718 * 3: prefetchnta 4719 * 4: prefetchw 4720 * 5: prefetchwt1 4721 */ 4722 //printf("cdprefetch\n"); 4723 elem *e1 = e.EV.E1; 4724 4725 assert(*pretregs == 0); 4726 assert(e.EV.E2.Eoper == OPconst); 4727 opcode_t op; 4728 reg_t reg; 4729 switch (e.EV.E2.EV.Vuns) 4730 { 4731 case 0: op = PREFETCH; reg = 1; break; // PREFETCH0 4732 case 1: op = PREFETCH; reg = 2; break; // PREFETCH1 4733 case 2: op = PREFETCH; reg = 3; break; // PREFETCH2 4734 case 3: op = PREFETCH; reg = 0; break; // PREFETCHNTA 4735 case 4: op = 0x0F0D; reg = 1; break; // PREFETCHW 4736 case 5: op = 0x0F0D; reg = 2; break; // PREFETCHWT1 4737 default: assert(0); 4738 } 4739 4740 freenode(e.EV.E2); 4741 4742 code cs = void; 4743 getlvalue(cdb,&cs,e1,0); 4744 cs.Iop = op; 4745 cs.Irm |= modregrm(0,reg,0); 4746 cs.Iflags |= CFvolatile; // do not schedule 4747 cdb.gen(&cs); 4748 } 4749 4750 4751 /********************* 4752 * Load register from EA of assignment operation. 4753 * Params: 4754 * cdb = store generated code here 4755 * cs = instruction with EA already set in it 4756 * e = assignment expression that will be evaluated 4757 * reg = set to register loaded from EA 4758 * retregs = register candidates for reg 4759 */ 4760 private 4761 void opAssLoadReg(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t reg, regm_t retregs) 4762 { 4763 modEA(cdb, &cs); 4764 allocreg(cdb,&retregs,®,TYoffset); 4765 4766 cs.Iop = LOD; 4767 code_newreg(&cs,reg); 4768 cdb.gen(&cs); // MOV reg,EA 4769 } 4770 4771 /********************* 4772 * Load register pair from EA of assignment operation. 4773 * Params: 4774 * cdb = store generated code here 4775 * cs = instruction with EA already set in it 4776 * e = assignment expression that will be evaluated 4777 * rhi = set to most significant register of the pair 4778 * rlo = set toleast significant register of the pair 4779 * retregs = register candidates for rhi, rlo 4780 * keepmsk = registers to not modify 4781 */ 4782 private 4783 void opAssLoadPair(ref CodeBuilder cdb, ref code cs, elem* e, out reg_t rhi, out reg_t rlo, regm_t retregs, regm_t keepmsk) 4784 { 4785 getlvalue(cdb,&cs,e.EV.E1,retregs | keepmsk); 4786 const tym_t tyml = tybasic(e.EV.E1.Ety); // type of lvalue 4787 reg_t reg; 4788 allocreg(cdb,&retregs,®,tyml); 4789 4790 rhi = findregmsw(retregs); 4791 rlo = findreglsw(retregs); 4792 4793 cs.Iop = LOD; 4794 code_newreg(&cs,rlo); 4795 cdb.gen(&cs); // MOV rlo,EA 4796 getlvalue_msw(&cs); 4797 code_newreg(&cs,rhi); 4798 cdb.gen(&cs); // MOV rhi,EA+2 4799 getlvalue_lsw(&cs); 4800 } 4801 4802 4803 /********************************************************* 4804 * Store register result of assignment operation EA. 4805 * Params: 4806 * cdb = store generated code here 4807 * cs = instruction with EA already set in it 4808 * e = assignment expression that was evaluated 4809 * reg = register of result 4810 * pretregs = registers to store result in 4811 */ 4812 private 4813 void opAssStoreReg(ref CodeBuilder cdb, ref code cs, elem* e, reg_t reg, regm_t* pretregs) 4814 { 4815 elem* e1 = e.EV.E1; 4816 const tym_t tyml = tybasic(e1.Ety); // type of lvalue 4817 const uint sz = _tysize[tyml]; 4818 const ubyte isbyte = (sz == 1); // 1 for byte operation 4819 cs.Iop = STO ^ isbyte; 4820 code_newreg(&cs,reg); 4821 cdb.gen(&cs); // MOV EA,resreg 4822 if (e1.Ecount) // if we gen a CSE 4823 cssave(e1,mask(reg),!OTleaf(e1.Eoper)); 4824 freenode(e1); 4825 fixresult(cdb,e,mask(reg),pretregs); 4826 } 4827 4828 /********************************************************* 4829 * Store register pair result of assignment operation EA. 4830 * Params: 4831 * cdb = store generated code here 4832 * cs = instruction with EA already set in it 4833 * e = assignment expression that was evaluated 4834 * rhi = most significant register of the pair 4835 * rlo = least significant register of the pair 4836 * pretregs = registers to store result in 4837 */ 4838 private 4839 void opAssStorePair(ref CodeBuilder cdb, ref code cs, elem* e, reg_t rhi, reg_t rlo, regm_t* pretregs) 4840 { 4841 cs.Iop = STO; 4842 code_newreg(&cs,rlo); 4843 cdb.gen(&cs); // MOV EA,lsreg 4844 code_newreg(&cs,rhi); 4845 getlvalue_msw(&cs); 4846 cdb.gen(&cs); // MOV EA+REGSIZE,msreg 4847 const regm_t retregs = mask(rhi) | mask(rlo); 4848 elem* e1 = e.EV.E1; 4849 if (e1.Ecount) // if we gen a CSE 4850 cssave(e1,retregs,!OTleaf(e1.Eoper)); 4851 freenode(e1); 4852 fixresult(cdb,e,retregs,pretregs); 4853 } 4854 4855 4856 }