1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1985-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/code_x86.d, backend/code_x86.d) 10 */ 11 12 module dmd.backend.code_x86; 13 14 // Online documentation: https://dlang.org/phobos/dmd_backend_code_x86.html 15 16 import dmd.backend.cdef; 17 import dmd.backend.cc : config; 18 import dmd.backend.code; 19 import dmd.backend.codebuilder : CodeBuilder; 20 import dmd.backend.el : elem; 21 import dmd.backend.ty : I64; 22 import dmd.backend.barray; 23 24 nothrow: 25 26 alias opcode_t = uint; // CPU opcode 27 enum opcode_t NoOpcode = 0xFFFF; // not a valid opcode_t 28 29 /* Register definitions */ 30 31 enum 32 { 33 AX = 0, 34 CX = 1, 35 DX = 2, 36 BX = 3, 37 SP = 4, 38 BP = 5, 39 SI = 6, 40 DI = 7, 41 42 // #defining R12-R15 interfere with setjmps' _JUMP_BUFFER members 43 44 R8 = 8, 45 R9 = 9, 46 R10 = 10, 47 R11 = 11, 48 R12 = 12, 49 R13 = 13, 50 R14 = 14, 51 R15 = 15, 52 53 XMM0 = 16, 54 XMM1 = 17, 55 XMM2 = 18, 56 XMM3 = 19, 57 XMM4 = 20, 58 XMM5 = 21, 59 XMM6 = 22, 60 XMM7 = 23, 61 /* There are also XMM8..XMM14 */ 62 XMM15 = 31, 63 } 64 65 bool isXMMreg(reg_t reg) pure { return reg >= XMM0 && reg <= XMM15; } 66 67 enum PICREG = BX; 68 69 enum ES = 24; 70 71 enum NUMGENREGS = 16; 72 73 // fishy naming as it covers XMM7 but not XMM15 74 // currently only used as a replacement for mES in cgcod.c 75 enum NUMREGS = 25; 76 77 enum PSW = 25; 78 enum STACK = 26; // top of stack 79 enum ST0 = 27; // 8087 top of stack register 80 enum ST01 = 28; // top two 8087 registers; for complex types 81 82 enum reg_t NOREG = 29; // no register 83 84 enum 85 { 86 AL = 0, 87 CL = 1, 88 DL = 2, 89 BL = 3, 90 AH = 4, 91 CH = 5, 92 DH = 6, 93 BH = 7, 94 } 95 96 enum 97 { 98 mAX = 1, 99 mCX = 2, 100 mDX = 4, 101 mBX = 8, 102 mSP = 0x10, 103 mBP = 0x20, 104 mSI = 0x40, 105 mDI = 0x80, 106 107 mR8 = (1 << R8), 108 mR9 = (1 << R9), 109 mR10 = (1 << R10), 110 mR11 = (1 << R11), 111 mR12 = (1 << R12), 112 mR13 = (1 << R13), 113 mR14 = (1 << R14), 114 mR15 = (1 << R15), 115 116 mXMM0 = (1 << XMM0), 117 mXMM1 = (1 << XMM1), 118 mXMM2 = (1 << XMM2), 119 mXMM3 = (1 << XMM3), 120 mXMM4 = (1 << XMM4), 121 mXMM5 = (1 << XMM5), 122 mXMM6 = (1 << XMM6), 123 mXMM7 = (1 << XMM7), 124 XMMREGS = (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7), 125 126 mES = (1 << ES), // 0x1000000 127 mPSW = (1 << PSW), // 0x2000000 128 129 mSTACK = (1 << STACK), // 0x4000000 130 131 mST0 = (1 << ST0), // 0x20000000 132 mST01 = (1 << ST01), // 0x40000000 133 } 134 135 // Flags for getlvalue (must fit in regm_t) 136 enum RMload = (1 << 30); 137 enum RMstore = (1 << 31); 138 139 extern (C++) extern __gshared regm_t ALLREGS; 140 extern (C++) extern __gshared regm_t BYTEREGS; 141 142 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 143 { 144 // To support positional independent code, 145 // must be able to remove BX from available registers 146 enum ALLREGS_INIT = (mAX|mBX|mCX|mDX|mSI|mDI); 147 enum ALLREGS_INIT_PIC = (mAX|mCX|mDX|mSI|mDI); 148 enum BYTEREGS_INIT = (mAX|mBX|mCX|mDX); 149 enum BYTEREGS_INIT_PIC = (mAX|mCX|mDX); 150 } 151 else 152 { 153 enum ALLREGS_INIT = (mAX|mBX|mCX|mDX|mSI|mDI); 154 enum BYTEREGS_INIT = (mAX|mBX|mCX|mDX); 155 } 156 157 158 /* We use the same IDXREGS for the 386 as the 8088, because if 159 we used ALLREGS, it would interfere with mMSW 160 */ 161 enum IDXREGS = (mBX|mSI|mDI); 162 163 enum FLOATREGS_64 = mAX; 164 enum FLOATREGS2_64 = mDX; 165 enum DOUBLEREGS_64 = mAX; 166 enum DOUBLEREGS2_64 = mDX; 167 168 enum FLOATREGS_32 = mAX; 169 enum FLOATREGS2_32 = mDX; 170 enum DOUBLEREGS_32 = (mAX|mDX); 171 enum DOUBLEREGS2_32 = (mCX|mBX); 172 173 enum FLOATREGS_16 = (mDX|mAX); 174 enum FLOATREGS2_16 = (mCX|mBX); 175 enum DOUBLEREGS_16 = (mAX|mBX|mCX|mDX); 176 177 /*#define _8087REGS (mST0|mST1|mST2|mST3|mST4|mST5|mST6|mST7)*/ 178 179 /* Segment registers */ 180 enum 181 { 182 SEG_ES = 0, 183 SEG_CS = 1, 184 SEG_SS = 2, 185 SEG_DS = 3, 186 } 187 188 /********************* 189 * Masks for register pairs. 190 * Note that index registers are always LSWs. This is for the convenience 191 * of implementing far pointers. 192 */ 193 194 static if (0) 195 { 196 // Give us an extra one so we can enregister a long 197 enum mMSW = mCX|mDX|mDI|mES; // most significant regs 198 enum mLSW = mAX|mBX|mSI; // least significant regs 199 } 200 else 201 { 202 enum mMSW = mCX|mDX|mES; // most significant regs 203 enum mLSW = mAX|mBX|mSI|mDI; // least significant regs 204 } 205 206 /* Return !=0 if there is a SIB byte */ 207 uint issib(uint rm) { return (rm & 7) == 4 && (rm & 0xC0) != 0xC0; } 208 209 static if (0) 210 { 211 // relocation field size is always 32bits 212 //enum is32bitaddr(x,Iflags) (1) 213 } 214 else 215 { 216 // 217 // is32bitaddr works correctly only when x is 0 or 1. This is 218 // true today for the current definition of I32, but if the definition 219 // of I32 changes, this macro will need to change as well 220 // 221 // Note: even for linux targets, CFaddrsize can be set by the inline 222 // assembler. 223 bool is32bitaddr(bool x,code_flags_t Iflags) { return I64 || (x ^ ((Iflags & CFaddrsize) !=0)); } 224 } 225 226 227 /********************** 228 * C library routines. 229 * See callclib(). 230 */ 231 232 enum CLIB 233 { 234 lcmp, 235 lmul, 236 ldiv, 237 lmod, 238 uldiv, 239 ulmod, 240 241 dmul,ddiv,dtst0,dtst0exc,dcmp,dcmpexc,dneg,dadd,dsub, 242 fmul,fdiv,ftst0,ftst0exc,fcmp,fcmpexc,fneg,fadd,fsub, 243 244 dbllng,lngdbl,dblint,intdbl, 245 dbluns,unsdbl, 246 dblulng, 247 ulngdbl, 248 dblflt,fltdbl, 249 dblllng, 250 llngdbl, 251 dblullng, 252 ullngdbl, 253 dtst, 254 vptrfptr,cvptrfptr, 255 256 _87topsw,fltto87,dblto87,dblint87,dbllng87, 257 ftst, 258 fcompp, 259 ftest, 260 ftest0, 261 fdiv87, 262 263 // Complex numbers 264 cmul, 265 cdiv, 266 ccmp, 267 268 u64_ldbl, 269 ld_u64, 270 MAX 271 } 272 273 alias code_flags_t = uint; 274 enum 275 { 276 CFes = 1, // generate an ES: segment override for this instr 277 CFjmp16 = 2, // need 16 bit jump offset (long branch) 278 CFtarg = 4, // this code is the target of a jump 279 CFseg = 8, // get segment of immediate value 280 CFoff = 0x10, // get offset of immediate value 281 CFss = 0x20, // generate an SS: segment override (not with 282 // CFes at the same time, though!) 283 CFpsw = 0x40, // we need the flags result after this instruction 284 CFopsize = 0x80, // prefix with operand size 285 CFaddrsize = 0x100, // prefix with address size 286 CFds = 0x200, // need DS override (not with ES, SS, or CS ) 287 CFcs = 0x400, // need CS override 288 CFfs = 0x800, // need FS override 289 CFgs = CFcs | CFfs, // need GS override 290 CFwait = 0x1000, // If I32 it indicates when to output a WAIT 291 CFselfrel = 0x2000, // if self-relative 292 CFunambig = 0x4000, // indicates cannot be accessed by other addressing 293 // modes 294 CFtarg2 = 0x8000, // like CFtarg, but we can't optimize this away 295 CFvolatile = 0x10000, // volatile reference, do not schedule 296 CFclassinit = 0x20000, // class init code 297 CFoffset64 = 0x40000, // offset is 64 bits 298 CFpc32 = 0x80000, // I64: PC relative 32 bit fixup 299 300 CFvex = 0x100000, // vex prefix 301 CFvex3 = 0x200000, // 3 byte vex prefix 302 303 CFjmp5 = 0x400000, // always a 5 byte jmp 304 CFswitch = 0x800000, // kludge for switch table fixups 305 306 CFindirect = 0x1000000, // OSX32: indirect fixups 307 308 /* These are for CFpc32 fixups, they're the negative of the offset of the fixup 309 * from the program counter 310 */ 311 CFREL = 0x7000000, 312 313 CFSEG = CFes | CFss | CFds | CFcs | CFfs | CFgs, 314 CFPREFIX = CFSEG | CFopsize | CFaddrsize, 315 } 316 317 struct code 318 { 319 code *next; 320 code_flags_t Iflags; 321 322 union 323 { 324 opcode_t Iop; 325 struct Svex 326 { 327 nothrow: 328 align(1): 329 ubyte op; 330 331 // [R X B m-mmmm] [W vvvv L pp] 332 ushort _pp; 333 334 @property ushort pp() const { return _pp & 3; } 335 @property void pp(ushort v) { _pp = (_pp & ~3) | (v & 3); } 336 337 @property ushort l() const { return (_pp >> 2) & 1; } 338 @property void l(ushort v) { _pp = cast(ushort)((_pp & ~4) | ((v & 1) << 2)); } 339 340 @property ushort vvvv() const { return (_pp >> 3) & 0x0F; } 341 @property void vvvv(ushort v) { _pp = cast(ushort)((_pp & ~0x78) | ((v & 0x0F) << 3)); } 342 343 @property ushort w() const { return (_pp >> 7) & 1; } 344 @property void w(ushort v) { _pp = cast(ushort)((_pp & ~0x80) | ((v & 1) << 7)); } 345 346 @property ushort mmmm() const { return (_pp >> 8) & 0x1F; } 347 @property void mmmm(ushort v) { _pp = cast(ushort)((_pp & ~0x1F00) | ((v & 0x1F) << 8)); } 348 349 @property ushort b() const { return (_pp >> 13) & 1; } 350 @property void b(ushort v) { _pp = cast(ushort)((_pp & ~0x2000) | ((v & 1) << 13)); } 351 352 @property ushort x() const { return (_pp >> 14) & 1; } 353 @property void x(ushort v) { _pp = cast(ushort)((_pp & ~0x4000) | ((v & 1) << 14)); } 354 355 @property ushort r() const { return (_pp >> 15) & 1; } 356 @property void r(ushort v) { _pp = cast(ushort)((_pp & ~0x8000) | (v << 15)); } 357 358 ubyte pfx; // always 0xC4 359 } 360 Svex Ivex; 361 } 362 363 /* The _EA is the "effective address" for the instruction, and consists of the modregrm byte, 364 * the sib byte, and the REX prefix byte. The 16 bit code generator just used the modregrm, 365 * the 32 bit x86 added the sib, and the 64 bit one added the rex. 366 */ 367 union 368 { 369 uint Iea; 370 struct 371 { 372 ubyte Irm; // reg/mode 373 ubyte Isib; // SIB byte 374 ubyte Irex; // REX prefix 375 } 376 } 377 378 /* IFL1 and IEV1 are the first operand, which usually winds up being the offset to the Effective 379 * Address. IFL1 is the tag saying which variant type is in IEV1. IFL2 and IEV2 is the second 380 * operand, usually for immediate instructions. 381 */ 382 383 ubyte IFL1,IFL2; // FLavors of 1st, 2nd operands 384 evc IEV1; // 1st operand, if any 385 evc IEV2; // 2nd operand, if any 386 387 nothrow: 388 void orReg(uint reg) 389 { if (reg & 8) 390 Irex |= REX_R; 391 Irm |= modregrm(0, reg & 7, 0); 392 } 393 394 void setReg(uint reg) 395 { 396 Irex &= ~REX_R; 397 Irm &= cast(ubyte)~cast(uint)modregrm(0, 7, 0); 398 orReg(reg); 399 } 400 401 bool isJumpOP() { return Iop == JMP || Iop == JMPS; } 402 403 extern (C++) void print() // pretty-printer 404 { 405 code_print(&this); 406 } 407 } 408 409 extern (C) void code_print(code*); 410 411 /******************* 412 * Some instructions. 413 */ 414 415 enum 416 { 417 SEGES = 0x26, 418 SEGCS = 0x2E, 419 SEGSS = 0x36, 420 SEGDS = 0x3E, 421 SEGFS = 0x64, 422 SEGGS = 0x65, 423 424 CMP = 0x3B, 425 CALL = 0xE8, 426 JMP = 0xE9, // Intra-Segment Direct 427 JMPS = 0xEB, // JMP SHORT 428 JCXZ = 0xE3, 429 LOOP = 0xE2, 430 LES = 0xC4, 431 LEA = 0x8D, 432 LOCK = 0xF0, 433 INT3 = 0xCC, 434 HLT = 0xF4, 435 436 STO = 0x89, 437 LOD = 0x8B, 438 439 JO = 0x70, 440 JNO = 0x71, 441 JC = 0x72, 442 JB = 0x72, 443 JNC = 0x73, 444 JAE = 0x73, 445 JE = 0x74, 446 JNE = 0x75, 447 JBE = 0x76, 448 JA = 0x77, 449 JS = 0x78, 450 JNS = 0x79, 451 JP = 0x7A, 452 JNP = 0x7B, 453 JL = 0x7C, 454 JGE = 0x7D, 455 JLE = 0x7E, 456 JG = 0x7F, 457 458 UD2 = 0x0F0B, 459 460 // NOP is used as a placeholder in the linked list of instructions, no 461 // actual code will be generated for it. 462 NOP = SEGCS, // don't use 0x90 because the 463 // Windows stuff wants to output 0x90's 464 465 ASM = SEGSS, // string of asm bytes 466 467 ESCAPE = SEGDS, // marker that special information is here 468 // (Iop2 is the type of special information) 469 } 470 471 472 enum ESCAPEmask = 0xFF; // code.Iop & ESCAPEmask ==> actual Iop 473 474 enum 475 { 476 ESClinnum = (1 << 8), // line number information 477 ESCctor = (2 << 8), // object is constructed 478 ESCdtor = (3 << 8), // object is destructed 479 ESCmark = (4 << 8), // mark eh stack 480 ESCrelease = (5 << 8), // release eh stack 481 ESCoffset = (6 << 8), // set code offset for eh 482 ESCadjesp = (7 << 8), // adjust ESP by IEV2.Vint 483 ESCmark2 = (8 << 8), // mark eh stack 484 ESCrelease2 = (9 << 8), // release eh stack 485 ESCframeptr = (10 << 8), // replace with load of frame pointer 486 ESCdctor = (11 << 8), // D object is constructed 487 ESCddtor = (12 << 8), // D object is destructed 488 ESCadjfpu = (13 << 8), // adjust fpustackused by IEV2.Vint 489 ESCfixesp = (14 << 8), // reset ESP to end of local frame 490 } 491 492 /********************************* 493 * Macros to ease generating code 494 * modregrm: generate mod reg r/m field 495 * modregxrm: reg could be R8..R15 496 * modregrmx: rm could be R8..R15 497 * modregxrmx: reg or rm could be R8..R15 498 * NEWREG: change reg field of x to r 499 * genorreg: OR t,f 500 */ 501 502 ubyte modregrm (uint m, uint r, uint rm) { return cast(ubyte)((m << 6) | (r << 3) | rm); } 503 uint modregxrm (uint m, uint r, uint rm) { return ((r&8)<<15)|modregrm(m,r&7,rm); } 504 uint modregrmx (uint m, uint r, uint rm) { return ((rm&8)<<13)|modregrm(m,r,rm&7); } 505 uint modregxrmx(uint m, uint r, uint rm) { return ((r&8)<<15)|((rm&8)<<13)|modregrm(m,r&7,rm&7); } 506 507 void NEWREXR(ref ubyte x, uint r) { x = (x&~REX_R)|((r&8)>>1); } 508 void NEWREG (ref ubyte x, uint r) { x = cast(ubyte)((x & ~(7 << 3)) | (r << 3)); } 509 void code_newreg(code* c, uint r) { NEWREG(c.Irm,r&7); NEWREXR(c.Irex,r); } 510 511 //#define genorreg(c,t,f) genregs((c),0x09,(f),(t)) 512 513 enum 514 { 515 REX = 0x40, // REX prefix byte, OR'd with the following bits: 516 REX_W = 8, // 0 = default operand size, 1 = 64 bit operand size 517 REX_R = 4, // high bit of reg field of modregrm 518 REX_X = 2, // high bit of sib index reg 519 REX_B = 1, // high bit of rm field, sib base reg, or opcode reg 520 } 521 522 uint VEX2_B1(code.Svex ivex) 523 { 524 return 525 ivex.r << 7 | 526 ivex.vvvv << 3 | 527 ivex.l << 2 | 528 ivex.pp; 529 } 530 531 uint VEX3_B1(code.Svex ivex) 532 { 533 return 534 ivex.r << 7 | 535 ivex.x << 6 | 536 ivex.b << 5 | 537 ivex.mmmm; 538 } 539 540 uint VEX3_B2(code.Svex ivex) 541 { 542 return 543 ivex.w << 7 | 544 ivex.vvvv << 3 | 545 ivex.l << 2 | 546 ivex.pp; 547 } 548 549 bool ADDFWAIT() { return config.target_cpu <= TARGET_80286; } 550 551 /************************************ 552 */ 553 554 extern (C++): 555 556 struct NDP 557 { 558 elem *e; // which elem is stored here (NULL if none) 559 uint offset; // offset from e (used for complex numbers) 560 } 561 562 struct Globals87 563 { 564 NDP[8] stack; // 8087 stack 565 int stackused = 0; // number of items on the 8087 stack 566 567 Barray!NDP save; // 8087 values spilled to memory 568 } 569 570 extern (C++) extern __gshared Globals87 global87; 571 572 void getlvalue_msw(code *); 573 void getlvalue_lsw(code *); 574 void getlvalue(ref CodeBuilder cdb, code *pcs, elem *e, regm_t keepmsk); 575 void loadea(ref CodeBuilder cdb, elem *e, code *cs, uint op, uint reg, targ_size_t offset, regm_t keepmsk, regm_t desmsk); 576