1 /**
2  * Constants and data structures specific to the x86 platform.
3  *
4  * Copyright:   Copyright (C) 1985-1998 by Symantec
5  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
6  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
7  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
8  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/code_x86.d, backend/code_x86.d)
9  * Documentation:  https://dlang.org/phobos/dmd_backend_code_x86.html
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/code_x86.d
11  */
12 
13 module dmd.backend.code_x86;
14 
15 // Online documentation: https://dlang.org/phobos/dmd_backend_code_x86.html
16 
17 import dmd.backend.cdef;
18 import dmd.backend.cc : config;
19 import dmd.backend.code;
20 import dmd.backend.codebuilder : CodeBuilder;
21 import dmd.backend.el : elem;
22 import dmd.backend.ty : I64;
23 import dmd.backend.barray;
24 
25 nothrow:
26 
27 alias opcode_t = uint;          // CPU opcode
28 enum opcode_t NoOpcode = 0xFFFF;              // not a valid opcode_t
29 
30 /* Register definitions */
31 
32 enum
33 {
34     AX      = 0,
35     CX      = 1,
36     DX      = 2,
37     BX      = 3,
38     SP      = 4,
39     BP      = 5,
40     SI      = 6,
41     DI      = 7,
42 
43     // #defining R12-R15 interfere with setjmps' _JUMP_BUFFER members
44 
45     R8       = 8,
46     R9       = 9,
47     R10      = 10,
48     R11      = 11,
49     R12      = 12,
50     R13      = 13,
51     R14      = 14,
52     R15      = 15,
53 
54     XMM0    = 16,
55     XMM1    = 17,
56     XMM2    = 18,
57     XMM3    = 19,
58     XMM4    = 20,
59     XMM5    = 21,
60     XMM6    = 22,
61     XMM7    = 23,
62 /* There are also XMM8..XMM14 */
63     XMM15   = 31,
64 }
65 
66 bool isXMMreg(reg_t reg) pure { return reg >= XMM0 && reg <= XMM15; }
67 
68 enum PICREG = BX;
69 
70 enum ES     = 24;
71 
72 enum NUMGENREGS = 16;
73 
74 // fishy naming as it covers XMM7 but not XMM15
75 // currently only used as a replacement for mES in cgcod.c
76 enum NUMREGS = 25;
77 
78 enum PSW     = 25;
79 enum STACK   = 26;      // top of stack
80 enum ST0     = 27;      // 8087 top of stack register
81 enum ST01    = 28;      // top two 8087 registers; for complex types
82 
83 enum reg_t NOREG   = 29;     // no register
84 
85 enum
86 {
87     AL      = 0,
88     CL      = 1,
89     DL      = 2,
90     BL      = 3,
91     AH      = 4,
92     CH      = 5,
93     DH      = 6,
94     BH      = 7,
95 }
96 
97 enum
98 {
99     mAX     = 1,
100     mCX     = 2,
101     mDX     = 4,
102     mBX     = 8,
103     mSP     = 0x10,
104     mBP     = 0x20,
105     mSI     = 0x40,
106     mDI     = 0x80,
107 
108     mR8     = (1 << R8),
109     mR9     = (1 << R9),
110     mR10    = (1 << R10),
111     mR11    = (1 << R11),
112     mR12    = (1 << R12),
113     mR13    = (1 << R13),
114     mR14    = (1 << R14),
115     mR15    = (1 << R15),
116 
117     mXMM0   = (1 << XMM0),
118     mXMM1   = (1 << XMM1),
119     mXMM2   = (1 << XMM2),
120     mXMM3   = (1 << XMM3),
121     mXMM4   = (1 << XMM4),
122     mXMM5   = (1 << XMM5),
123     mXMM6   = (1 << XMM6),
124     mXMM7   = (1 << XMM7),
125     XMMREGS = (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7),
126 
127     mES     = (1 << ES),      // 0x1000000
128     mPSW    = (1 << PSW),     // 0x2000000
129 
130     mSTACK  = (1 << STACK),   // 0x4000000
131 
132     mST0    = (1 << ST0),     // 0x20000000
133     mST01   = (1 << ST01),    // 0x40000000
134 }
135 
136 // Flags for getlvalue (must fit in regm_t)
137 enum RMload  = (1 << 30);
138 enum RMstore = (1 << 31);
139 
140 extern (C++) extern __gshared regm_t ALLREGS;
141 extern (C++) extern __gshared regm_t BYTEREGS;
142 
143 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
144 {
145     // To support positional independent code,
146     // must be able to remove BX from available registers
147     enum ALLREGS_INIT          = (mAX|mBX|mCX|mDX|mSI|mDI);
148     enum ALLREGS_INIT_PIC      = (mAX|mCX|mDX|mSI|mDI);
149     enum BYTEREGS_INIT         = (mAX|mBX|mCX|mDX);
150     enum BYTEREGS_INIT_PIC     = (mAX|mCX|mDX);
151 }
152 else
153 {
154     enum ALLREGS_INIT          = (mAX|mBX|mCX|mDX|mSI|mDI);
155     enum BYTEREGS_INIT         = (mAX|mBX|mCX|mDX);
156 }
157 
158 
159 /* We use the same IDXREGS for the 386 as the 8088, because if
160    we used ALLREGS, it would interfere with mMSW
161  */
162 enum IDXREGS         = (mBX|mSI|mDI);
163 
164 enum FLOATREGS_64    = mAX;
165 enum FLOATREGS2_64   = mDX;
166 enum DOUBLEREGS_64   = mAX;
167 enum DOUBLEREGS2_64  = mDX;
168 
169 enum FLOATREGS_32    = mAX;
170 enum FLOATREGS2_32   = mDX;
171 enum DOUBLEREGS_32   = (mAX|mDX);
172 enum DOUBLEREGS2_32  = (mCX|mBX);
173 
174 enum FLOATREGS_16    = (mDX|mAX);
175 enum FLOATREGS2_16   = (mCX|mBX);
176 enum DOUBLEREGS_16   = (mAX|mBX|mCX|mDX);
177 
178 /*#define _8087REGS (mST0|mST1|mST2|mST3|mST4|mST5|mST6|mST7)*/
179 
180 /* Segment registers    */
181 enum
182 {
183     SEG_ES  = 0,
184     SEG_CS  = 1,
185     SEG_SS  = 2,
186     SEG_DS  = 3,
187 }
188 
189 /*********************
190  * Masks for register pairs.
191  * Note that index registers are always LSWs. This is for the convenience
192  * of implementing far pointers.
193  */
194 
195 static if (0)
196 {
197 // Give us an extra one so we can enregister a long
198 enum mMSW = mCX|mDX|mDI|mES;       // most significant regs
199 enum mLSW = mAX|mBX|mSI;           // least significant regs
200 }
201 else
202 {
203 enum mMSW = mCX|mDX|mES;           // most significant regs
204 enum mLSW = mAX|mBX|mSI|mDI;       // least significant regs
205 }
206 
207 /* Return !=0 if there is a SIB byte   */
208 uint issib(uint rm) { return (rm & 7) == 4 && (rm & 0xC0) != 0xC0; }
209 
210 static if (0)
211 {
212 // relocation field size is always 32bits
213 //enum is32bitaddr(x,Iflags) (1)
214 }
215 else
216 {
217 //
218 // is32bitaddr works correctly only when x is 0 or 1.  This is
219 // true today for the current definition of I32, but if the definition
220 // of I32 changes, this macro will need to change as well
221 //
222 // Note: even for linux targets, CFaddrsize can be set by the inline
223 // assembler.
224 bool is32bitaddr(bool x,code_flags_t Iflags) { return I64 || (x ^ ((Iflags & CFaddrsize) !=0)); }
225 }
226 
227 
228 /**********************
229  * C library routines.
230  * See callclib().
231  */
232 
233 enum CLIB
234 {
235     lcmp,
236     lmul,
237     ldiv,
238     lmod,
239     uldiv,
240     ulmod,
241 
242     dmul,ddiv,dtst0,dtst0exc,dcmp,dcmpexc,dneg,dadd,dsub,
243     fmul,fdiv,ftst0,ftst0exc,fcmp,fcmpexc,fneg,fadd,fsub,
244 
245     dbllng,lngdbl,dblint,intdbl,
246     dbluns,unsdbl,
247     dblulng,
248     ulngdbl,
249     dblflt,fltdbl,
250     dblllng,
251     llngdbl,
252     dblullng,
253     ullngdbl,
254     dtst,
255     vptrfptr,cvptrfptr,
256 
257     _87topsw,fltto87,dblto87,dblint87,dbllng87,
258     ftst,
259     fcompp,
260     ftest,
261     ftest0,
262     fdiv87,
263 
264     // Complex numbers
265     cmul,
266     cdiv,
267     ccmp,
268 
269     u64_ldbl,
270     ld_u64,
271     MAX
272 }
273 
274 alias code_flags_t = uint;
275 enum
276 {
277     CFes        =        1,     // generate an ES: segment override for this instr
278     CFjmp16     =        2,     // need 16 bit jump offset (long branch)
279     CFtarg      =        4,     // this code is the target of a jump
280     CFseg       =        8,     // get segment of immediate value
281     CFoff       =     0x10,     // get offset of immediate value
282     CFss        =     0x20,     // generate an SS: segment override (not with
283                                 // CFes at the same time, though!)
284     CFpsw       =     0x40,     // we need the flags result after this instruction
285     CFopsize    =     0x80,     // prefix with operand size
286     CFaddrsize  =    0x100,     // prefix with address size
287     CFds        =    0x200,     // need DS override (not with ES, SS, or CS )
288     CFcs        =    0x400,     // need CS override
289     CFfs        =    0x800,     // need FS override
290     CFgs        =   CFcs | CFfs,   // need GS override
291     CFwait      =   0x1000,     // If I32 it indicates when to output a WAIT
292     CFselfrel   =   0x2000,     // if self-relative
293     CFunambig   =   0x4000,     // indicates cannot be accessed by other addressing
294                                 // modes
295     CFtarg2     =   0x8000,     // like CFtarg, but we can't optimize this away
296     CFvolatile  =  0x10000,     // volatile reference, do not schedule
297     CFclassinit =  0x20000,     // class init code
298     CFoffset64  =  0x40000,     // offset is 64 bits
299     CFpc32      =  0x80000,     // I64: PC relative 32 bit fixup
300 
301     CFvex       =  0x100000,    // vex prefix
302     CFvex3      =  0x200000,    // 3 byte vex prefix
303 
304     CFjmp5      =  0x400000,    // always a 5 byte jmp
305     CFswitch    =  0x800000,    // kludge for switch table fixups
306 
307     CFindirect  = 0x1000000,    // OSX32: indirect fixups
308 
309     /* These are for CFpc32 fixups, they're the negative of the offset of the fixup
310      * from the program counter
311      */
312     CFREL       = 0x7000000,
313 
314     CFSEG       = CFes | CFss | CFds | CFcs | CFfs | CFgs,
315     CFPREFIX    = CFSEG | CFopsize | CFaddrsize,
316 }
317 
318 struct code
319 {
320     code *next;
321     code_flags_t Iflags;
322 
323     union
324     {
325         opcode_t Iop;
326         struct Svex
327         {
328           nothrow:
329           align(1):
330             ubyte  op;
331 
332             // [R X B m-mmmm]  [W vvvv L pp]
333             ushort _pp;
334 
335             @property ushort pp() const { return _pp & 3; }
336             @property void pp(ushort v) { _pp = (_pp & ~3) | (v & 3); }
337 
338             @property ushort l() const { return (_pp >> 2) & 1; }
339             @property void l(ushort v) { _pp = cast(ushort)((_pp & ~4) | ((v & 1) << 2)); }
340 
341             @property ushort vvvv() const { return (_pp >> 3) & 0x0F; }
342             @property void vvvv(ushort v) { _pp = cast(ushort)((_pp & ~0x78) | ((v & 0x0F) << 3)); }
343 
344             @property ushort w() const { return (_pp >> 7) & 1; }
345             @property void w(ushort v) { _pp = cast(ushort)((_pp & ~0x80) | ((v & 1) << 7)); }
346 
347             @property ushort mmmm() const { return (_pp >> 8) & 0x1F; }
348             @property void mmmm(ushort v) { _pp = cast(ushort)((_pp & ~0x1F00) | ((v & 0x1F) << 8)); }
349 
350             @property ushort b() const { return (_pp >> 13) & 1; }
351             @property void b(ushort v) { _pp = cast(ushort)((_pp & ~0x2000) | ((v & 1) << 13)); }
352 
353             @property ushort x() const { return (_pp >> 14) & 1; }
354             @property void x(ushort v) { _pp = cast(ushort)((_pp & ~0x4000) | ((v & 1) << 14)); }
355 
356             @property ushort r() const { return (_pp >> 15) & 1; }
357             @property void r(ushort v) { _pp = cast(ushort)((_pp & ~0x8000) | (v << 15)); }
358 
359             ubyte pfx; // always 0xC4
360         }
361         Svex Ivex;
362     }
363 
364     /* The _EA is the "effective address" for the instruction, and consists of the modregrm byte,
365      * the sib byte, and the REX prefix byte. The 16 bit code generator just used the modregrm,
366      * the 32 bit x86 added the sib, and the 64 bit one added the rex.
367      */
368     union
369     {
370         uint Iea;
371         struct
372         {
373             ubyte Irm;          // reg/mode
374             ubyte Isib;         // SIB byte
375             ubyte Irex;         // REX prefix
376         }
377     }
378 
379     /* IFL1 and IEV1 are the first operand, which usually winds up being the offset to the Effective
380      * Address. IFL1 is the tag saying which variant type is in IEV1. IFL2 and IEV2 is the second
381      * operand, usually for immediate instructions.
382      */
383 
384     ubyte IFL1,IFL2;    // FLavors of 1st, 2nd operands
385     evc IEV1;             // 1st operand, if any
386     evc IEV2;             // 2nd operand, if any
387 
388   nothrow:
389     void orReg(uint reg)
390     {   if (reg & 8)
391             Irex |= REX_R;
392         Irm |= modregrm(0, reg & 7, 0);
393     }
394 
395     void setReg(uint reg)
396     {
397         Irex &= ~REX_R;
398         Irm &= cast(ubyte)~cast(uint)modregrm(0, 7, 0);
399         orReg(reg);
400     }
401 
402     bool isJumpOP() { return Iop == JMP || Iop == JMPS; }
403 
404     extern (C++) void print()               // pretty-printer
405     {
406         code_print(&this);
407     }
408 }
409 
410 extern (C) void code_print(code*);
411 
412 /*******************
413  * Some instructions.
414  */
415 
416 enum
417 {
418     SEGES   = 0x26,
419     SEGCS   = 0x2E,
420     SEGSS   = 0x36,
421     SEGDS   = 0x3E,
422     SEGFS   = 0x64,
423     SEGGS   = 0x65,
424 
425     CMP     = 0x3B,
426     CALL    = 0xE8,
427     JMP     = 0xE9,    // Intra-Segment Direct
428     JMPS    = 0xEB,    // JMP SHORT
429     JCXZ    = 0xE3,
430     LOOP    = 0xE2,
431     LES     = 0xC4,
432     LEA     = 0x8D,
433     LOCK    = 0xF0,
434     INT3    = 0xCC,
435     HLT     = 0xF4,
436     ENTER   = 0xC8,
437     LEAVE   = 0xC9,
438     MOVSXb  = 0x0FBE,
439     MOVSXw  = 0x0FBF,
440     MOVZXb  = 0x0FB6,
441     MOVZXw  = 0x0FB7,
442 
443     STOSB   = 0xAA,
444     STOS    = 0xAB,
445 
446     STO     = 0x89,
447     LOD     = 0x8B,
448 
449     JO      = 0x70,
450     JNO     = 0x71,
451     JC      = 0x72,
452     JB      = 0x72,
453     JNC     = 0x73,
454     JAE     = 0x73,
455     JE      = 0x74,
456     JNE     = 0x75,
457     JBE     = 0x76,
458     JA      = 0x77,
459     JS      = 0x78,
460     JNS     = 0x79,
461     JP      = 0x7A,
462     JNP     = 0x7B,
463     JL      = 0x7C,
464     JGE     = 0x7D,
465     JLE     = 0x7E,
466     JG      = 0x7F,
467 
468     UD2     = 0x0F0B,
469     PAUSE   = 0xF390,  // aka REP NOP
470 
471     // NOP is used as a placeholder in the linked list of instructions, no
472     // actual code will be generated for it.
473     NOP     = SEGCS,   // don't use 0x90 because the
474                        // Windows stuff wants to output 0x90's
475 
476     ASM     = SEGSS,   // string of asm bytes
477 
478     ESCAPE  = SEGDS,   // marker that special information is here
479                        // (Iop2 is the type of special information)
480 }
481 
482 
483 enum ESCAPEmask = 0xFF; // code.Iop & ESCAPEmask ==> actual Iop
484 
485 enum
486 {
487     ESClinnum   = (1 << 8),      // line number information
488     ESCctor     = (2 << 8),      // object is constructed
489     ESCdtor     = (3 << 8),      // object is destructed
490     ESCmark     = (4 << 8),      // mark eh stack
491     ESCrelease  = (5 << 8),      // release eh stack
492     ESCoffset   = (6 << 8),      // set code offset for eh
493     ESCadjesp   = (7 << 8),      // adjust ESP by IEV2.Vint
494     ESCmark2    = (8 << 8),      // mark eh stack
495     ESCrelease2 = (9 << 8),      // release eh stack
496     ESCframeptr = (10 << 8),     // replace with load of frame pointer
497     ESCdctor    = (11 << 8),     // D object is constructed
498     ESCddtor    = (12 << 8),     // D object is destructed
499     ESCadjfpu   = (13 << 8),     // adjust fpustackused by IEV2.Vint
500     ESCfixesp   = (14 << 8),     // reset ESP to end of local frame
501 }
502 
503 /*********************************
504  * Macros to ease generating code
505  * modregrm:    generate mod reg r/m field
506  * modregxrm:   reg could be R8..R15
507  * modregrmx:   rm could be R8..R15
508  * modregxrmx:  reg or rm could be R8..R15
509  * NEWREG:      change reg field of x to r
510  * genorreg:    OR  t,f
511  */
512 
513 ubyte modregrm (uint m, uint r, uint rm) { return cast(ubyte)((m << 6) | (r << 3) | rm); }
514 uint modregxrm (uint m, uint r, uint rm) { return ((r&8)<<15)|modregrm(m,r&7,rm); }
515 uint modregrmx (uint m, uint r, uint rm) { return ((rm&8)<<13)|modregrm(m,r,rm&7); }
516 uint modregxrmx(uint m, uint r, uint rm) { return ((r&8)<<15)|((rm&8)<<13)|modregrm(m,r&7,rm&7); }
517 
518 void NEWREXR(ref ubyte x, uint r)  { x = (x&~REX_R)|((r&8)>>1); }
519 void NEWREG (ref ubyte x, uint r)  { x = cast(ubyte)((x & ~(7 << 3)) | (r << 3)); }
520 void code_newreg(code* c, uint r)  { NEWREG(c.Irm,r&7); NEWREXR(c.Irex,r); }
521 
522 //#define genorreg(c,t,f)         genregs((c),0x09,(f),(t))
523 
524 enum
525 {
526     REX     = 0x40,        // REX prefix byte, OR'd with the following bits:
527     REX_W   = 8,           // 0 = default operand size, 1 = 64 bit operand size
528     REX_R   = 4,           // high bit of reg field of modregrm
529     REX_X   = 2,           // high bit of sib index reg
530     REX_B   = 1,           // high bit of rm field, sib base reg, or opcode reg
531 }
532 
533 uint VEX2_B1(code.Svex ivex)
534 {
535     return
536         ivex.r    << 7 |
537         ivex.vvvv << 3 |
538         ivex.l    << 2 |
539         ivex.pp;
540 }
541 
542 uint VEX3_B1(code.Svex ivex)
543 {
544     return
545         ivex.r    << 7 |
546         ivex.x    << 6 |
547         ivex.b    << 5 |
548         ivex.mmmm;
549 }
550 
551 uint VEX3_B2(code.Svex ivex)
552 {
553     return
554         ivex.w    << 7 |
555         ivex.vvvv << 3 |
556         ivex.l    << 2 |
557         ivex.pp;
558 }
559 
560 bool ADDFWAIT() { return config.target_cpu <= TARGET_80286; }
561 
562 /************************************
563  */
564 
565 extern (C++):
566 
567 struct NDP
568 {
569     elem *e;                    // which elem is stored here (NULL if none)
570     uint offset;            // offset from e (used for complex numbers)
571 }
572 
573 struct Globals87
574 {
575     NDP[8] stack;              // 8087 stack
576     int stackused = 0;         // number of items on the 8087 stack
577 
578     Barray!NDP save;           // 8087 values spilled to memory
579 }
580 
581 extern (C++) extern __gshared Globals87 global87;
582 
583 void getlvalue_msw(code *);
584 void getlvalue_lsw(code *);
585 void getlvalue(ref CodeBuilder cdb, code *pcs, elem *e, regm_t keepmsk);
586 void loadea(ref CodeBuilder cdb, elem *e, code *cs, uint op, uint reg, targ_size_t offset, regm_t keepmsk, regm_t desmsk);
587