1 /** 2 * Constants and data structures specific to the x86 platform. 3 * 4 * Copyright: Copyright (C) 1985-1998 by Symantec 5 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 6 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 7 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 8 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/code_x86.d, backend/code_x86.d) 9 * Documentation: https://dlang.org/phobos/dmd_backend_code_x86.html 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/code_x86.d 11 */ 12 13 module dmd.backend.code_x86; 14 15 // Online documentation: https://dlang.org/phobos/dmd_backend_code_x86.html 16 17 import dmd.backend.cdef; 18 import dmd.backend.cc : config; 19 import dmd.backend.code; 20 import dmd.backend.codebuilder : CodeBuilder; 21 import dmd.backend.el : elem; 22 import dmd.backend.ty : I64; 23 import dmd.backend.barray; 24 25 nothrow: 26 27 alias opcode_t = uint; // CPU opcode 28 enum opcode_t NoOpcode = 0xFFFF; // not a valid opcode_t 29 30 /* Register definitions */ 31 32 enum 33 { 34 AX = 0, 35 CX = 1, 36 DX = 2, 37 BX = 3, 38 SP = 4, 39 BP = 5, 40 SI = 6, 41 DI = 7, 42 43 // #defining R12-R15 interfere with setjmps' _JUMP_BUFFER members 44 45 R8 = 8, 46 R9 = 9, 47 R10 = 10, 48 R11 = 11, 49 R12 = 12, 50 R13 = 13, 51 R14 = 14, 52 R15 = 15, 53 54 XMM0 = 16, 55 XMM1 = 17, 56 XMM2 = 18, 57 XMM3 = 19, 58 XMM4 = 20, 59 XMM5 = 21, 60 XMM6 = 22, 61 XMM7 = 23, 62 /* There are also XMM8..XMM14 */ 63 XMM15 = 31, 64 } 65 66 bool isXMMreg(reg_t reg) pure { return reg >= XMM0 && reg <= XMM15; } 67 68 enum PICREG = BX; 69 70 enum ES = 24; 71 72 enum NUMGENREGS = 16; 73 74 // fishy naming as it covers XMM7 but not XMM15 75 // currently only used as a replacement for mES in cgcod.c 76 enum NUMREGS = 25; 77 78 enum PSW = 25; 79 enum STACK = 26; // top of stack 80 enum ST0 = 27; // 8087 top of stack register 81 enum ST01 = 28; // top two 8087 registers; for complex types 82 83 enum reg_t NOREG = 29; // no register 84 85 enum 86 { 87 AL = 0, 88 CL = 1, 89 DL = 2, 90 BL = 3, 91 AH = 4, 92 CH = 5, 93 DH = 6, 94 BH = 7, 95 } 96 97 enum 98 { 99 mAX = 1, 100 mCX = 2, 101 mDX = 4, 102 mBX = 8, 103 mSP = 0x10, 104 mBP = 0x20, 105 mSI = 0x40, 106 mDI = 0x80, 107 108 mR8 = (1 << R8), 109 mR9 = (1 << R9), 110 mR10 = (1 << R10), 111 mR11 = (1 << R11), 112 mR12 = (1 << R12), 113 mR13 = (1 << R13), 114 mR14 = (1 << R14), 115 mR15 = (1 << R15), 116 117 mXMM0 = (1 << XMM0), 118 mXMM1 = (1 << XMM1), 119 mXMM2 = (1 << XMM2), 120 mXMM3 = (1 << XMM3), 121 mXMM4 = (1 << XMM4), 122 mXMM5 = (1 << XMM5), 123 mXMM6 = (1 << XMM6), 124 mXMM7 = (1 << XMM7), 125 XMMREGS = (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7), 126 127 mES = (1 << ES), // 0x1000000 128 mPSW = (1 << PSW), // 0x2000000 129 130 mSTACK = (1 << STACK), // 0x4000000 131 132 mST0 = (1 << ST0), // 0x20000000 133 mST01 = (1 << ST01), // 0x40000000 134 } 135 136 // Flags for getlvalue (must fit in regm_t) 137 enum RMload = (1 << 30); 138 enum RMstore = (1 << 31); 139 140 extern (C++) extern __gshared regm_t ALLREGS; 141 extern (C++) extern __gshared regm_t BYTEREGS; 142 143 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 144 { 145 // To support positional independent code, 146 // must be able to remove BX from available registers 147 enum ALLREGS_INIT = (mAX|mBX|mCX|mDX|mSI|mDI); 148 enum ALLREGS_INIT_PIC = (mAX|mCX|mDX|mSI|mDI); 149 enum BYTEREGS_INIT = (mAX|mBX|mCX|mDX); 150 enum BYTEREGS_INIT_PIC = (mAX|mCX|mDX); 151 } 152 else 153 { 154 enum ALLREGS_INIT = (mAX|mBX|mCX|mDX|mSI|mDI); 155 enum BYTEREGS_INIT = (mAX|mBX|mCX|mDX); 156 } 157 158 159 /* We use the same IDXREGS for the 386 as the 8088, because if 160 we used ALLREGS, it would interfere with mMSW 161 */ 162 enum IDXREGS = (mBX|mSI|mDI); 163 164 enum FLOATREGS_64 = mAX; 165 enum FLOATREGS2_64 = mDX; 166 enum DOUBLEREGS_64 = mAX; 167 enum DOUBLEREGS2_64 = mDX; 168 169 enum FLOATREGS_32 = mAX; 170 enum FLOATREGS2_32 = mDX; 171 enum DOUBLEREGS_32 = (mAX|mDX); 172 enum DOUBLEREGS2_32 = (mCX|mBX); 173 174 enum FLOATREGS_16 = (mDX|mAX); 175 enum FLOATREGS2_16 = (mCX|mBX); 176 enum DOUBLEREGS_16 = (mAX|mBX|mCX|mDX); 177 178 /*#define _8087REGS (mST0|mST1|mST2|mST3|mST4|mST5|mST6|mST7)*/ 179 180 /* Segment registers */ 181 enum 182 { 183 SEG_ES = 0, 184 SEG_CS = 1, 185 SEG_SS = 2, 186 SEG_DS = 3, 187 } 188 189 /********************* 190 * Masks for register pairs. 191 * Note that index registers are always LSWs. This is for the convenience 192 * of implementing far pointers. 193 */ 194 195 static if (0) 196 { 197 // Give us an extra one so we can enregister a long 198 enum mMSW = mCX|mDX|mDI|mES; // most significant regs 199 enum mLSW = mAX|mBX|mSI; // least significant regs 200 } 201 else 202 { 203 enum mMSW = mCX|mDX|mES; // most significant regs 204 enum mLSW = mAX|mBX|mSI|mDI; // least significant regs 205 } 206 207 /* Return !=0 if there is a SIB byte */ 208 uint issib(uint rm) { return (rm & 7) == 4 && (rm & 0xC0) != 0xC0; } 209 210 static if (0) 211 { 212 // relocation field size is always 32bits 213 //enum is32bitaddr(x,Iflags) (1) 214 } 215 else 216 { 217 // 218 // is32bitaddr works correctly only when x is 0 or 1. This is 219 // true today for the current definition of I32, but if the definition 220 // of I32 changes, this macro will need to change as well 221 // 222 // Note: even for linux targets, CFaddrsize can be set by the inline 223 // assembler. 224 bool is32bitaddr(bool x,code_flags_t Iflags) { return I64 || (x ^ ((Iflags & CFaddrsize) !=0)); } 225 } 226 227 228 /********************** 229 * C library routines. 230 * See callclib(). 231 */ 232 233 enum CLIB 234 { 235 lcmp, 236 lmul, 237 ldiv, 238 lmod, 239 uldiv, 240 ulmod, 241 242 dmul,ddiv,dtst0,dtst0exc,dcmp,dcmpexc,dneg,dadd,dsub, 243 fmul,fdiv,ftst0,ftst0exc,fcmp,fcmpexc,fneg,fadd,fsub, 244 245 dbllng,lngdbl,dblint,intdbl, 246 dbluns,unsdbl, 247 dblulng, 248 ulngdbl, 249 dblflt,fltdbl, 250 dblllng, 251 llngdbl, 252 dblullng, 253 ullngdbl, 254 dtst, 255 vptrfptr,cvptrfptr, 256 257 _87topsw,fltto87,dblto87,dblint87,dbllng87, 258 ftst, 259 fcompp, 260 ftest, 261 ftest0, 262 fdiv87, 263 264 // Complex numbers 265 cmul, 266 cdiv, 267 ccmp, 268 269 u64_ldbl, 270 ld_u64, 271 MAX 272 } 273 274 alias code_flags_t = uint; 275 enum 276 { 277 CFes = 1, // generate an ES: segment override for this instr 278 CFjmp16 = 2, // need 16 bit jump offset (long branch) 279 CFtarg = 4, // this code is the target of a jump 280 CFseg = 8, // get segment of immediate value 281 CFoff = 0x10, // get offset of immediate value 282 CFss = 0x20, // generate an SS: segment override (not with 283 // CFes at the same time, though!) 284 CFpsw = 0x40, // we need the flags result after this instruction 285 CFopsize = 0x80, // prefix with operand size 286 CFaddrsize = 0x100, // prefix with address size 287 CFds = 0x200, // need DS override (not with ES, SS, or CS ) 288 CFcs = 0x400, // need CS override 289 CFfs = 0x800, // need FS override 290 CFgs = CFcs | CFfs, // need GS override 291 CFwait = 0x1000, // If I32 it indicates when to output a WAIT 292 CFselfrel = 0x2000, // if self-relative 293 CFunambig = 0x4000, // indicates cannot be accessed by other addressing 294 // modes 295 CFtarg2 = 0x8000, // like CFtarg, but we can't optimize this away 296 CFvolatile = 0x10000, // volatile reference, do not schedule 297 CFclassinit = 0x20000, // class init code 298 CFoffset64 = 0x40000, // offset is 64 bits 299 CFpc32 = 0x80000, // I64: PC relative 32 bit fixup 300 301 CFvex = 0x100000, // vex prefix 302 CFvex3 = 0x200000, // 3 byte vex prefix 303 304 CFjmp5 = 0x400000, // always a 5 byte jmp 305 CFswitch = 0x800000, // kludge for switch table fixups 306 307 CFindirect = 0x1000000, // OSX32: indirect fixups 308 309 /* These are for CFpc32 fixups, they're the negative of the offset of the fixup 310 * from the program counter 311 */ 312 CFREL = 0x7000000, 313 314 CFSEG = CFes | CFss | CFds | CFcs | CFfs | CFgs, 315 CFPREFIX = CFSEG | CFopsize | CFaddrsize, 316 } 317 318 struct code 319 { 320 code *next; 321 code_flags_t Iflags; 322 323 union 324 { 325 opcode_t Iop; 326 struct Svex 327 { 328 nothrow: 329 align(1): 330 ubyte op; 331 332 // [R X B m-mmmm] [W vvvv L pp] 333 ushort _pp; 334 335 @property ushort pp() const { return _pp & 3; } 336 @property void pp(ushort v) { _pp = (_pp & ~3) | (v & 3); } 337 338 @property ushort l() const { return (_pp >> 2) & 1; } 339 @property void l(ushort v) { _pp = cast(ushort)((_pp & ~4) | ((v & 1) << 2)); } 340 341 @property ushort vvvv() const { return (_pp >> 3) & 0x0F; } 342 @property void vvvv(ushort v) { _pp = cast(ushort)((_pp & ~0x78) | ((v & 0x0F) << 3)); } 343 344 @property ushort w() const { return (_pp >> 7) & 1; } 345 @property void w(ushort v) { _pp = cast(ushort)((_pp & ~0x80) | ((v & 1) << 7)); } 346 347 @property ushort mmmm() const { return (_pp >> 8) & 0x1F; } 348 @property void mmmm(ushort v) { _pp = cast(ushort)((_pp & ~0x1F00) | ((v & 0x1F) << 8)); } 349 350 @property ushort b() const { return (_pp >> 13) & 1; } 351 @property void b(ushort v) { _pp = cast(ushort)((_pp & ~0x2000) | ((v & 1) << 13)); } 352 353 @property ushort x() const { return (_pp >> 14) & 1; } 354 @property void x(ushort v) { _pp = cast(ushort)((_pp & ~0x4000) | ((v & 1) << 14)); } 355 356 @property ushort r() const { return (_pp >> 15) & 1; } 357 @property void r(ushort v) { _pp = cast(ushort)((_pp & ~0x8000) | (v << 15)); } 358 359 ubyte pfx; // always 0xC4 360 } 361 Svex Ivex; 362 } 363 364 /* The _EA is the "effective address" for the instruction, and consists of the modregrm byte, 365 * the sib byte, and the REX prefix byte. The 16 bit code generator just used the modregrm, 366 * the 32 bit x86 added the sib, and the 64 bit one added the rex. 367 */ 368 union 369 { 370 uint Iea; 371 struct 372 { 373 ubyte Irm; // reg/mode 374 ubyte Isib; // SIB byte 375 ubyte Irex; // REX prefix 376 } 377 } 378 379 /* IFL1 and IEV1 are the first operand, which usually winds up being the offset to the Effective 380 * Address. IFL1 is the tag saying which variant type is in IEV1. IFL2 and IEV2 is the second 381 * operand, usually for immediate instructions. 382 */ 383 384 ubyte IFL1,IFL2; // FLavors of 1st, 2nd operands 385 evc IEV1; // 1st operand, if any 386 evc IEV2; // 2nd operand, if any 387 388 nothrow: 389 void orReg(uint reg) 390 { if (reg & 8) 391 Irex |= REX_R; 392 Irm |= modregrm(0, reg & 7, 0); 393 } 394 395 void setReg(uint reg) 396 { 397 Irex &= ~REX_R; 398 Irm &= cast(ubyte)~cast(uint)modregrm(0, 7, 0); 399 orReg(reg); 400 } 401 402 bool isJumpOP() { return Iop == JMP || Iop == JMPS; } 403 404 extern (C++) void print() // pretty-printer 405 { 406 code_print(&this); 407 } 408 } 409 410 extern (C) void code_print(code*); 411 412 /******************* 413 * Some instructions. 414 */ 415 416 enum 417 { 418 SEGES = 0x26, 419 SEGCS = 0x2E, 420 SEGSS = 0x36, 421 SEGDS = 0x3E, 422 SEGFS = 0x64, 423 SEGGS = 0x65, 424 425 CMP = 0x3B, 426 CALL = 0xE8, 427 JMP = 0xE9, // Intra-Segment Direct 428 JMPS = 0xEB, // JMP SHORT 429 JCXZ = 0xE3, 430 LOOP = 0xE2, 431 LES = 0xC4, 432 LEA = 0x8D, 433 LOCK = 0xF0, 434 INT3 = 0xCC, 435 HLT = 0xF4, 436 ENTER = 0xC8, 437 LEAVE = 0xC9, 438 MOVSXb = 0x0FBE, 439 MOVSXw = 0x0FBF, 440 MOVZXb = 0x0FB6, 441 MOVZXw = 0x0FB7, 442 443 STOSB = 0xAA, 444 STOS = 0xAB, 445 446 STO = 0x89, 447 LOD = 0x8B, 448 449 JO = 0x70, 450 JNO = 0x71, 451 JC = 0x72, 452 JB = 0x72, 453 JNC = 0x73, 454 JAE = 0x73, 455 JE = 0x74, 456 JNE = 0x75, 457 JBE = 0x76, 458 JA = 0x77, 459 JS = 0x78, 460 JNS = 0x79, 461 JP = 0x7A, 462 JNP = 0x7B, 463 JL = 0x7C, 464 JGE = 0x7D, 465 JLE = 0x7E, 466 JG = 0x7F, 467 468 UD2 = 0x0F0B, 469 PAUSE = 0xF390, // aka REP NOP 470 471 // NOP is used as a placeholder in the linked list of instructions, no 472 // actual code will be generated for it. 473 NOP = SEGCS, // don't use 0x90 because the 474 // Windows stuff wants to output 0x90's 475 476 ASM = SEGSS, // string of asm bytes 477 478 ESCAPE = SEGDS, // marker that special information is here 479 // (Iop2 is the type of special information) 480 } 481 482 483 enum ESCAPEmask = 0xFF; // code.Iop & ESCAPEmask ==> actual Iop 484 485 enum 486 { 487 ESClinnum = (1 << 8), // line number information 488 ESCctor = (2 << 8), // object is constructed 489 ESCdtor = (3 << 8), // object is destructed 490 ESCmark = (4 << 8), // mark eh stack 491 ESCrelease = (5 << 8), // release eh stack 492 ESCoffset = (6 << 8), // set code offset for eh 493 ESCadjesp = (7 << 8), // adjust ESP by IEV2.Vint 494 ESCmark2 = (8 << 8), // mark eh stack 495 ESCrelease2 = (9 << 8), // release eh stack 496 ESCframeptr = (10 << 8), // replace with load of frame pointer 497 ESCdctor = (11 << 8), // D object is constructed 498 ESCddtor = (12 << 8), // D object is destructed 499 ESCadjfpu = (13 << 8), // adjust fpustackused by IEV2.Vint 500 ESCfixesp = (14 << 8), // reset ESP to end of local frame 501 } 502 503 /********************************* 504 * Macros to ease generating code 505 * modregrm: generate mod reg r/m field 506 * modregxrm: reg could be R8..R15 507 * modregrmx: rm could be R8..R15 508 * modregxrmx: reg or rm could be R8..R15 509 * NEWREG: change reg field of x to r 510 * genorreg: OR t,f 511 */ 512 513 ubyte modregrm (uint m, uint r, uint rm) { return cast(ubyte)((m << 6) | (r << 3) | rm); } 514 uint modregxrm (uint m, uint r, uint rm) { return ((r&8)<<15)|modregrm(m,r&7,rm); } 515 uint modregrmx (uint m, uint r, uint rm) { return ((rm&8)<<13)|modregrm(m,r,rm&7); } 516 uint modregxrmx(uint m, uint r, uint rm) { return ((r&8)<<15)|((rm&8)<<13)|modregrm(m,r&7,rm&7); } 517 518 void NEWREXR(ref ubyte x, uint r) { x = (x&~REX_R)|((r&8)>>1); } 519 void NEWREG (ref ubyte x, uint r) { x = cast(ubyte)((x & ~(7 << 3)) | (r << 3)); } 520 void code_newreg(code* c, uint r) { NEWREG(c.Irm,r&7); NEWREXR(c.Irex,r); } 521 522 //#define genorreg(c,t,f) genregs((c),0x09,(f),(t)) 523 524 enum 525 { 526 REX = 0x40, // REX prefix byte, OR'd with the following bits: 527 REX_W = 8, // 0 = default operand size, 1 = 64 bit operand size 528 REX_R = 4, // high bit of reg field of modregrm 529 REX_X = 2, // high bit of sib index reg 530 REX_B = 1, // high bit of rm field, sib base reg, or opcode reg 531 } 532 533 uint VEX2_B1(code.Svex ivex) 534 { 535 return 536 ivex.r << 7 | 537 ivex.vvvv << 3 | 538 ivex.l << 2 | 539 ivex.pp; 540 } 541 542 uint VEX3_B1(code.Svex ivex) 543 { 544 return 545 ivex.r << 7 | 546 ivex.x << 6 | 547 ivex.b << 5 | 548 ivex.mmmm; 549 } 550 551 uint VEX3_B2(code.Svex ivex) 552 { 553 return 554 ivex.w << 7 | 555 ivex.vvvv << 3 | 556 ivex.l << 2 | 557 ivex.pp; 558 } 559 560 bool ADDFWAIT() { return config.target_cpu <= TARGET_80286; } 561 562 /************************************ 563 */ 564 565 extern (C++): 566 567 struct NDP 568 { 569 elem *e; // which elem is stored here (NULL if none) 570 uint offset; // offset from e (used for complex numbers) 571 } 572 573 struct Globals87 574 { 575 NDP[8] stack; // 8087 stack 576 int stackused = 0; // number of items on the 8087 stack 577 578 Barray!NDP save; // 8087 values spilled to memory 579 } 580 581 extern (C++) extern __gshared Globals87 global87; 582 583 void getlvalue_msw(code *); 584 void getlvalue_lsw(code *); 585 void getlvalue(ref CodeBuilder cdb, code *pcs, elem *e, regm_t keepmsk); 586 void loadea(ref CodeBuilder cdb, elem *e, code *cs, uint op, uint reg, targ_size_t offset, regm_t keepmsk, regm_t desmsk); 587