1 /**
2  * Constants and data structures specific to the x86 platform.
3  *
4  * Copyright:   Copyright (C) 1985-1998 by Symantec
5  *              Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved
6  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/code_x86.d, backend/code_x86.d)
9  * Documentation:  https://dlang.org/phobos/dmd_backend_code_x86.html
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/code_x86.d
11  */
12 
13 module dmd.backend.code_x86;
14 
15 // Online documentation: https://dlang.org/phobos/dmd_backend_code_x86.html
16 
17 import dmd.backend.cdef;
18 import dmd.backend.cc : config;
19 import dmd.backend.code;
20 import dmd.backend.codebuilder : CodeBuilder;
21 import dmd.backend.el : elem;
22 import dmd.backend.ty : I64;
23 import dmd.backend.barray;
24 
25 nothrow:
26 
27 alias opcode_t = uint;          // CPU opcode
28 enum opcode_t NoOpcode = 0xFFFF;              // not a valid opcode_t
29 
30 /* Register definitions */
31 
32 enum
33 {
34     AX      = 0,
35     CX      = 1,
36     DX      = 2,
37     BX      = 3,
38     SP      = 4,
39     BP      = 5,
40     SI      = 6,
41     DI      = 7,
42 
43     // #defining R12-R15 interfere with setjmps' _JUMP_BUFFER members
44 
45     R8       = 8,
46     R9       = 9,
47     R10      = 10,
48     R11      = 11,
49     R12      = 12,
50     R13      = 13,
51     R14      = 14,
52     R15      = 15,
53 
54     XMM0    = 16,
55     XMM1    = 17,
56     XMM2    = 18,
57     XMM3    = 19,
58     XMM4    = 20,
59     XMM5    = 21,
60     XMM6    = 22,
61     XMM7    = 23,
62 /* There are also XMM8..XMM14 */
63     XMM15   = 31,
64 }
65 
66 bool isXMMreg(reg_t reg) pure { return reg >= XMM0 && reg <= XMM15; }
67 
68 enum PICREG = BX;
69 
70 enum ES     = 24;
71 
72 enum NUMGENREGS = 16;
73 
74 // fishy naming as it covers XMM7 but not XMM15
75 // currently only used as a replacement for mES in cgcod.c
76 enum NUMREGS = 25;
77 
78 enum PSW     = 25;
79 enum STACK   = 26;      // top of stack
80 enum ST0     = 27;      // 8087 top of stack register
81 enum ST01    = 28;      // top two 8087 registers; for complex types
82 
83 enum reg_t NOREG   = 29;     // no register
84 
85 enum
86 {
87     AL      = 0,
88     CL      = 1,
89     DL      = 2,
90     BL      = 3,
91     AH      = 4,
92     CH      = 5,
93     DH      = 6,
94     BH      = 7,
95 }
96 
97 enum
98 {
99     mAX     = 1,
100     mCX     = 2,
101     mDX     = 4,
102     mBX     = 8,
103     mSP     = 0x10,
104     mBP     = 0x20,
105     mSI     = 0x40,
106     mDI     = 0x80,
107 
108     mR8     = (1 << R8),
109     mR9     = (1 << R9),
110     mR10    = (1 << R10),
111     mR11    = (1 << R11),
112     mR12    = (1 << R12),
113     mR13    = (1 << R13),
114     mR14    = (1 << R14),
115     mR15    = (1 << R15),
116 
117     mXMM0   = (1 << XMM0),
118     mXMM1   = (1 << XMM1),
119     mXMM2   = (1 << XMM2),
120     mXMM3   = (1 << XMM3),
121     mXMM4   = (1 << XMM4),
122     mXMM5   = (1 << XMM5),
123     mXMM6   = (1 << XMM6),
124     mXMM7   = (1 << XMM7),
125     XMMREGS = (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7),
126 
127     mES     = (1 << ES),      // 0x1000000
128     mPSW    = (1 << PSW),     // 0x2000000
129 
130     mSTACK  = (1 << STACK),   // 0x4000000
131 
132     mST0    = (1 << ST0),     // 0x20000000
133     mST01   = (1 << ST01),    // 0x40000000
134 }
135 
136 // Flags for getlvalue (must fit in regm_t)
137 enum RMload  = (1 << 30);
138 enum RMstore = (1 << 31);
139 
140 extern (C++) extern __gshared regm_t ALLREGS;
141 extern (C++) extern __gshared regm_t BYTEREGS;
142 
143     // To support positional independent code,
144     // must be able to remove BX from available registers
145     enum ALLREGS_INIT          = (mAX|mBX|mCX|mDX|mSI|mDI);
146     enum ALLREGS_INIT_PIC      = (mAX|mCX|mDX|mSI|mDI);
147     enum BYTEREGS_INIT         = (mAX|mBX|mCX|mDX);
148     enum BYTEREGS_INIT_PIC     = (mAX|mCX|mDX);
149 
150 /* We use the same IDXREGS for the 386 as the 8088, because if
151    we used ALLREGS, it would interfere with mMSW
152  */
153 enum IDXREGS         = (mBX|mSI|mDI);
154 
155 enum FLOATREGS_64    = mAX;
156 enum FLOATREGS2_64   = mDX;
157 enum DOUBLEREGS_64   = mAX;
158 enum DOUBLEREGS2_64  = mDX;
159 
160 enum FLOATREGS_32    = mAX;
161 enum FLOATREGS2_32   = mDX;
162 enum DOUBLEREGS_32   = (mAX|mDX);
163 enum DOUBLEREGS2_32  = (mCX|mBX);
164 
165 enum FLOATREGS_16    = (mDX|mAX);
166 enum FLOATREGS2_16   = (mCX|mBX);
167 enum DOUBLEREGS_16   = (mAX|mBX|mCX|mDX);
168 
169 /*#define _8087REGS (mST0|mST1|mST2|mST3|mST4|mST5|mST6|mST7)*/
170 
171 /* Segment registers    */
172 enum
173 {
174     SEG_ES  = 0,
175     SEG_CS  = 1,
176     SEG_SS  = 2,
177     SEG_DS  = 3,
178 }
179 
180 /*********************
181  * Masks for register pairs.
182  * Note that index registers are always LSWs. This is for the convenience
183  * of implementing far pointers.
184  */
185 
186 static if (0)
187 {
188 // Give us an extra one so we can enregister a long
189 enum mMSW = mCX|mDX|mDI|mES;       // most significant regs
190 enum mLSW = mAX|mBX|mSI;           // least significant regs
191 }
192 else
193 {
194 enum mMSW = mCX|mDX|mES;           // most significant regs
195 enum mLSW = mAX|mBX|mSI|mDI;       // least significant regs
196 }
197 
198 /* Return !=0 if there is a SIB byte   */
199 uint issib(uint rm) { return (rm & 7) == 4 && (rm & 0xC0) != 0xC0; }
200 
201 static if (0)
202 {
203 // relocation field size is always 32bits
204 //enum is32bitaddr(x,Iflags) (1)
205 }
206 else
207 {
208 //
209 // is32bitaddr works correctly only when x is 0 or 1.  This is
210 // true today for the current definition of I32, but if the definition
211 // of I32 changes, this macro will need to change as well
212 //
213 // Note: even for linux targets, CFaddrsize can be set by the inline
214 // assembler.
215 bool is32bitaddr(bool x,code_flags_t Iflags) { return I64 || (x ^ ((Iflags & CFaddrsize) !=0)); }
216 }
217 
218 
219 /**********************
220  * C library routines.
221  * See callclib().
222  */
223 
224 enum CLIB
225 {
226     lcmp,
227     lmul,
228     ldiv,
229     lmod,
230     uldiv,
231     ulmod,
232 
233     dmul,ddiv,dtst0,dtst0exc,dcmp,dcmpexc,dneg,dadd,dsub,
234     fmul,fdiv,ftst0,ftst0exc,fcmp,fcmpexc,fneg,fadd,fsub,
235 
236     dbllng,lngdbl,dblint,intdbl,
237     dbluns,unsdbl,
238     dblulng,
239     ulngdbl,
240     dblflt,fltdbl,
241     dblllng,
242     llngdbl,
243     dblullng,
244     ullngdbl,
245     dtst,
246     vptrfptr,cvptrfptr,
247 
248     _87topsw,fltto87,dblto87,dblint87,dbllng87,
249     ftst,
250     fcompp,
251     ftest,
252     ftest0,
253     fdiv87,
254 
255     // Complex numbers
256     cmul,
257     cdiv,
258     ccmp,
259 
260     u64_ldbl,
261     ld_u64,
262     MAX
263 }
264 
265 alias code_flags_t = uint;
266 enum
267 {
268     CFes        =        1,     // generate an ES: segment override for this instr
269     CFjmp16     =        2,     // need 16 bit jump offset (long branch)
270     CFtarg      =        4,     // this code is the target of a jump
271     CFseg       =        8,     // get segment of immediate value
272     CFoff       =     0x10,     // get offset of immediate value
273     CFss        =     0x20,     // generate an SS: segment override (not with
274                                 // CFes at the same time, though!)
275     CFpsw       =     0x40,     // we need the flags result after this instruction
276     CFopsize    =     0x80,     // prefix with operand size
277     CFaddrsize  =    0x100,     // prefix with address size
278     CFds        =    0x200,     // need DS override (not with ES, SS, or CS )
279     CFcs        =    0x400,     // need CS override
280     CFfs        =    0x800,     // need FS override
281     CFgs        =   CFcs | CFfs,   // need GS override
282     CFwait      =   0x1000,     // If I32 it indicates when to output a WAIT
283     CFselfrel   =   0x2000,     // if self-relative
284     CFunambig   =   0x4000,     // indicates cannot be accessed by other addressing
285                                 // modes
286     CFtarg2     =   0x8000,     // like CFtarg, but we can't optimize this away
287     CFvolatile  =  0x10000,     // volatile reference, do not schedule
288     CFclassinit =  0x20000,     // class init code
289     CFoffset64  =  0x40000,     // offset is 64 bits
290     CFpc32      =  0x80000,     // I64: PC relative 32 bit fixup
291 
292     CFvex       =  0x100000,    // vex prefix
293     CFvex3      =  0x200000,    // 3 byte vex prefix
294 
295     CFjmp5      =  0x400000,    // always a 5 byte jmp
296     CFswitch    =  0x800000,    // kludge for switch table fixups
297 
298     CFindirect  = 0x1000000,    // OSX32: indirect fixups
299 
300     /* These are for CFpc32 fixups, they're the negative of the offset of the fixup
301      * from the program counter
302      */
303     CFREL       = 0x7000000,
304 
305     CFSEG       = CFes | CFss | CFds | CFcs | CFfs | CFgs,
306     CFPREFIX    = CFSEG | CFopsize | CFaddrsize,
307 }
308 
309 struct code
310 {
311     code *next;
312     code_flags_t Iflags;
313 
314     union
315     {
316         opcode_t Iop;
317         struct Svex
318         {
319           nothrow:
320           align(1):
321             ubyte  op;
322 
323             // [R X B m-mmmm]  [W vvvv L pp]
324             ushort _pp;
325 
326             @property ushort pp() const { return _pp & 3; }
327             @property void pp(ushort v) { _pp = (_pp & ~3) | (v & 3); }
328 
329             @property ushort l() const { return (_pp >> 2) & 1; }
330             @property void l(ushort v) { _pp = cast(ushort)((_pp & ~4) | ((v & 1) << 2)); }
331 
332             @property ushort vvvv() const { return (_pp >> 3) & 0x0F; }
333             @property void vvvv(ushort v) { _pp = cast(ushort)((_pp & ~0x78) | ((v & 0x0F) << 3)); }
334 
335             @property ushort w() const { return (_pp >> 7) & 1; }
336             @property void w(ushort v) { _pp = cast(ushort)((_pp & ~0x80) | ((v & 1) << 7)); }
337 
338             @property ushort mmmm() const { return (_pp >> 8) & 0x1F; }
339             @property void mmmm(ushort v) { _pp = cast(ushort)((_pp & ~0x1F00) | ((v & 0x1F) << 8)); }
340 
341             @property ushort b() const { return (_pp >> 13) & 1; }
342             @property void b(ushort v) { _pp = cast(ushort)((_pp & ~0x2000) | ((v & 1) << 13)); }
343 
344             @property ushort x() const { return (_pp >> 14) & 1; }
345             @property void x(ushort v) { _pp = cast(ushort)((_pp & ~0x4000) | ((v & 1) << 14)); }
346 
347             @property ushort r() const { return (_pp >> 15) & 1; }
348             @property void r(ushort v) { _pp = cast(ushort)((_pp & ~0x8000) | (v << 15)); }
349 
350             ubyte pfx; // always 0xC4
351         }
352         Svex Ivex;
353     }
354 
355     /* The _EA is the "effective address" for the instruction, and consists of the modregrm byte,
356      * the sib byte, and the REX prefix byte. The 16 bit code generator just used the modregrm,
357      * the 32 bit x86 added the sib, and the 64 bit one added the rex.
358      */
359     union
360     {
361         uint Iea;
362         struct
363         {
364             ubyte Irm;          // reg/mode
365             ubyte Isib;         // SIB byte
366             ubyte Irex;         // REX prefix
367         }
368     }
369 
370     /* IFL1 and IEV1 are the first operand, which usually winds up being the offset to the Effective
371      * Address. IFL1 is the tag saying which variant type is in IEV1. IFL2 and IEV2 is the second
372      * operand, usually for immediate instructions.
373      */
374 
375     ubyte IFL1,IFL2;    // FLavors of 1st, 2nd operands
376     evc IEV1;             // 1st operand, if any
377     evc IEV2;             // 2nd operand, if any
378 
379   nothrow:
380     void orReg(uint reg)
381     {   if (reg & 8)
382             Irex |= REX_R;
383         Irm |= modregrm(0, reg & 7, 0);
384     }
385 
386     void setReg(uint reg)
387     {
388         Irex &= ~REX_R;
389         Irm &= cast(ubyte)~cast(uint)modregrm(0, 7, 0);
390         orReg(reg);
391     }
392 
393     bool isJumpOP() { return Iop == JMP || Iop == JMPS; }
394 
395     extern (C++) void print()               // pretty-printer
396     {
397         code_print(&this);
398     }
399 }
400 
401 extern (C) void code_print(code*);
402 
403 /*******************
404  * Some instructions.
405  */
406 
407 enum
408 {
409     SEGES   = 0x26,
410     SEGCS   = 0x2E,
411     SEGSS   = 0x36,
412     SEGDS   = 0x3E,
413     SEGFS   = 0x64,
414     SEGGS   = 0x65,
415 
416     CMP     = 0x3B,
417     CALL    = 0xE8,
418     JMP     = 0xE9,    // Intra-Segment Direct
419     JMPS    = 0xEB,    // JMP SHORT
420     JCXZ    = 0xE3,
421     LOOP    = 0xE2,
422     LES     = 0xC4,
423     LEA     = 0x8D,
424     LOCK    = 0xF0,
425     INT3    = 0xCC,
426     HLT     = 0xF4,
427     ENTER   = 0xC8,
428     LEAVE   = 0xC9,
429     MOVSXb  = 0x0FBE,
430     MOVSXw  = 0x0FBF,
431     MOVZXb  = 0x0FB6,
432     MOVZXw  = 0x0FB7,
433 
434     STOSB   = 0xAA,
435     STOS    = 0xAB,
436 
437     STO     = 0x89,
438     LOD     = 0x8B,
439 
440     JO      = 0x70,
441     JNO     = 0x71,
442     JC      = 0x72,
443     JB      = 0x72,
444     JNC     = 0x73,
445     JAE     = 0x73,
446     JE      = 0x74,
447     JNE     = 0x75,
448     JBE     = 0x76,
449     JA      = 0x77,
450     JS      = 0x78,
451     JNS     = 0x79,
452     JP      = 0x7A,
453     JNP     = 0x7B,
454     JL      = 0x7C,
455     JGE     = 0x7D,
456     JLE     = 0x7E,
457     JG      = 0x7F,
458 
459     UD2     = 0x0F0B,
460     PAUSE   = 0xF390,  // aka REP NOP
461 
462     // NOP is used as a placeholder in the linked list of instructions, no
463     // actual code will be generated for it.
464     NOP     = SEGCS,   // don't use 0x90 because the
465                        // Windows stuff wants to output 0x90's
466 
467     ASM     = SEGSS,   // string of asm bytes
468 
469     ESCAPE  = SEGDS,   // marker that special information is here
470                        // (Iop2 is the type of special information)
471 }
472 
473 
474 enum ESCAPEmask = 0xFF; // code.Iop & ESCAPEmask ==> actual Iop
475 
476 enum
477 {
478     ESClinnum   = (1 << 8),      // line number information
479     ESCctor     = (2 << 8),      // object is constructed
480     ESCdtor     = (3 << 8),      // object is destructed
481     ESCmark     = (4 << 8),      // mark eh stack
482     ESCrelease  = (5 << 8),      // release eh stack
483     ESCoffset   = (6 << 8),      // set code offset for eh
484     ESCadjesp   = (7 << 8),      // adjust ESP by IEV2.Vint
485     ESCmark2    = (8 << 8),      // mark eh stack
486     ESCrelease2 = (9 << 8),      // release eh stack
487     ESCframeptr = (10 << 8),     // replace with load of frame pointer
488     ESCdctor    = (11 << 8),     // D object is constructed
489     ESCddtor    = (12 << 8),     // D object is destructed
490     ESCadjfpu   = (13 << 8),     // adjust fpustackused by IEV2.Vint
491     ESCfixesp   = (14 << 8),     // reset ESP to end of local frame
492 }
493 
494 /*********************************
495  * Macros to ease generating code
496  * modregrm:    generate mod reg r/m field
497  * modregxrm:   reg could be R8..R15
498  * modregrmx:   rm could be R8..R15
499  * modregxrmx:  reg or rm could be R8..R15
500  * NEWREG:      change reg field of x to r
501  * genorreg:    OR  t,f
502  */
503 
504 ubyte modregrm (uint m, uint r, uint rm) { return cast(ubyte)((m << 6) | (r << 3) | rm); }
505 uint modregxrm (uint m, uint r, uint rm) { return ((r&8)<<15)|modregrm(m,r&7,rm); }
506 uint modregrmx (uint m, uint r, uint rm) { return ((rm&8)<<13)|modregrm(m,r,rm&7); }
507 uint modregxrmx(uint m, uint r, uint rm) { return ((r&8)<<15)|((rm&8)<<13)|modregrm(m,r&7,rm&7); }
508 
509 void NEWREXR(ref ubyte x, uint r)  { x = (x&~REX_R)|((r&8)>>1); }
510 void NEWREG (ref ubyte x, uint r)  { x = cast(ubyte)((x & ~(7 << 3)) | (r << 3)); }
511 void code_newreg(code* c, uint r)  { NEWREG(c.Irm,r&7); NEWREXR(c.Irex,r); }
512 
513 //#define genorreg(c,t,f)         genregs((c),0x09,(f),(t))
514 
515 enum
516 {
517     REX     = 0x40,        // REX prefix byte, OR'd with the following bits:
518     REX_W   = 8,           // 0 = default operand size, 1 = 64 bit operand size
519     REX_R   = 4,           // high bit of reg field of modregrm
520     REX_X   = 2,           // high bit of sib index reg
521     REX_B   = 1,           // high bit of rm field, sib base reg, or opcode reg
522 }
523 
524 uint VEX2_B1(code.Svex ivex)
525 {
526     return
527         ivex.r    << 7 |
528         ivex.vvvv << 3 |
529         ivex.l    << 2 |
530         ivex.pp;
531 }
532 
533 uint VEX3_B1(code.Svex ivex)
534 {
535     return
536         ivex.r    << 7 |
537         ivex.x    << 6 |
538         ivex.b    << 5 |
539         ivex.mmmm;
540 }
541 
542 uint VEX3_B2(code.Svex ivex)
543 {
544     return
545         ivex.w    << 7 |
546         ivex.vvvv << 3 |
547         ivex.l    << 2 |
548         ivex.pp;
549 }
550 
551 bool ADDFWAIT() { return config.target_cpu <= TARGET_80286; }
552 
553 /************************************
554  */
555 
556 extern (C++):
557 
558 struct NDP
559 {
560     elem *e;                    // which elem is stored here (NULL if none)
561     uint offset;            // offset from e (used for complex numbers)
562 }
563 
564 struct Globals87
565 {
566     NDP[8] stack;              // 8087 stack
567     int stackused = 0;         // number of items on the 8087 stack
568 
569     Barray!NDP save;           // 8087 values spilled to memory
570 }
571 
572 extern (C++) extern __gshared Globals87 global87;
573 
574 void getlvalue_msw(code *);
575 void getlvalue_lsw(code *);
576 void getlvalue(ref CodeBuilder cdb, code *pcs, elem *e, regm_t keepmsk);
577 void loadea(ref CodeBuilder cdb, elem *e, code *cs, uint op, uint reg, targ_size_t offset, regm_t keepmsk, regm_t desmsk);
578