1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1985-1998 by Symantec
6  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/code_x86.d, backend/code_x86.d)
10  */
11 
12 module dmd.backend.code_x86;
13 
14 // Online documentation: https://dlang.org/phobos/dmd_backend_code_x86.html
15 
16 import dmd.backend.cdef;
17 import dmd.backend.cc : config;
18 import dmd.backend.code;
19 import dmd.backend.codebuilder : CodeBuilder;
20 import dmd.backend.el : elem;
21 import dmd.backend.ty : I64;
22 import dmd.backend.barray;
23 
24 nothrow:
25 
26 alias opcode_t = uint;          // CPU opcode
27 enum opcode_t NoOpcode = 0xFFFF;              // not a valid opcode_t
28 
29 /* Register definitions */
30 
31 enum
32 {
33     AX      = 0,
34     CX      = 1,
35     DX      = 2,
36     BX      = 3,
37     SP      = 4,
38     BP      = 5,
39     SI      = 6,
40     DI      = 7,
41 
42     // #defining R12-R15 interfere with setjmps' _JUMP_BUFFER members
43 
44     R8       = 8,
45     R9       = 9,
46     R10      = 10,
47     R11      = 11,
48     R12      = 12,
49     R13      = 13,
50     R14      = 14,
51     R15      = 15,
52 
53     XMM0    = 16,
54     XMM1    = 17,
55     XMM2    = 18,
56     XMM3    = 19,
57     XMM4    = 20,
58     XMM5    = 21,
59     XMM6    = 22,
60     XMM7    = 23,
61 /* There are also XMM8..XMM14 */
62     XMM15   = 31,
63 }
64 
65 bool isXMMreg(reg_t reg) pure { return reg >= XMM0 && reg <= XMM15; }
66 
67 enum PICREG = BX;
68 
69 enum ES     = 24;
70 
71 enum NUMGENREGS = 16;
72 
73 // fishy naming as it covers XMM7 but not XMM15
74 // currently only used as a replacement for mES in cgcod.c
75 enum NUMREGS = 25;
76 
77 enum PSW     = 25;
78 enum STACK   = 26;      // top of stack
79 enum ST0     = 27;      // 8087 top of stack register
80 enum ST01    = 28;      // top two 8087 registers; for complex types
81 
82 enum reg_t NOREG   = 29;     // no register
83 
84 enum
85 {
86     AL      = 0,
87     CL      = 1,
88     DL      = 2,
89     BL      = 3,
90     AH      = 4,
91     CH      = 5,
92     DH      = 6,
93     BH      = 7,
94 }
95 
96 enum
97 {
98     mAX     = 1,
99     mCX     = 2,
100     mDX     = 4,
101     mBX     = 8,
102     mSP     = 0x10,
103     mBP     = 0x20,
104     mSI     = 0x40,
105     mDI     = 0x80,
106 
107     mR8     = (1 << R8),
108     mR9     = (1 << R9),
109     mR10    = (1 << R10),
110     mR11    = (1 << R11),
111     mR12    = (1 << R12),
112     mR13    = (1 << R13),
113     mR14    = (1 << R14),
114     mR15    = (1 << R15),
115 
116     mXMM0   = (1 << XMM0),
117     mXMM1   = (1 << XMM1),
118     mXMM2   = (1 << XMM2),
119     mXMM3   = (1 << XMM3),
120     mXMM4   = (1 << XMM4),
121     mXMM5   = (1 << XMM5),
122     mXMM6   = (1 << XMM6),
123     mXMM7   = (1 << XMM7),
124     XMMREGS = (mXMM0 |mXMM1 |mXMM2 |mXMM3 |mXMM4 |mXMM5 |mXMM6 |mXMM7),
125 
126     mES     = (1 << ES),      // 0x1000000
127     mPSW    = (1 << PSW),     // 0x2000000
128 
129     mSTACK  = (1 << STACK),   // 0x4000000
130 
131     mST0    = (1 << ST0),     // 0x20000000
132     mST01   = (1 << ST01),    // 0x40000000
133 }
134 
135 // Flags for getlvalue (must fit in regm_t)
136 enum RMload  = (1 << 30);
137 enum RMstore = (1 << 31);
138 
139 extern (C++) extern __gshared regm_t ALLREGS;
140 extern (C++) extern __gshared regm_t BYTEREGS;
141 
142 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
143 {
144     // To support positional independent code,
145     // must be able to remove BX from available registers
146     enum ALLREGS_INIT          = (mAX|mBX|mCX|mDX|mSI|mDI);
147     enum ALLREGS_INIT_PIC      = (mAX|mCX|mDX|mSI|mDI);
148     enum BYTEREGS_INIT         = (mAX|mBX|mCX|mDX);
149     enum BYTEREGS_INIT_PIC     = (mAX|mCX|mDX);
150 }
151 else
152 {
153     enum ALLREGS_INIT          = (mAX|mBX|mCX|mDX|mSI|mDI);
154     enum BYTEREGS_INIT         = (mAX|mBX|mCX|mDX);
155 }
156 
157 
158 /* We use the same IDXREGS for the 386 as the 8088, because if
159    we used ALLREGS, it would interfere with mMSW
160  */
161 enum IDXREGS         = (mBX|mSI|mDI);
162 
163 enum FLOATREGS_64    = mAX;
164 enum FLOATREGS2_64   = mDX;
165 enum DOUBLEREGS_64   = mAX;
166 enum DOUBLEREGS2_64  = mDX;
167 
168 enum FLOATREGS_32    = mAX;
169 enum FLOATREGS2_32   = mDX;
170 enum DOUBLEREGS_32   = (mAX|mDX);
171 enum DOUBLEREGS2_32  = (mCX|mBX);
172 
173 enum FLOATREGS_16    = (mDX|mAX);
174 enum FLOATREGS2_16   = (mCX|mBX);
175 enum DOUBLEREGS_16   = (mAX|mBX|mCX|mDX);
176 
177 /*#define _8087REGS (mST0|mST1|mST2|mST3|mST4|mST5|mST6|mST7)*/
178 
179 /* Segment registers    */
180 enum
181 {
182     SEG_ES  = 0,
183     SEG_CS  = 1,
184     SEG_SS  = 2,
185     SEG_DS  = 3,
186 }
187 
188 /*********************
189  * Masks for register pairs.
190  * Note that index registers are always LSWs. This is for the convenience
191  * of implementing far pointers.
192  */
193 
194 static if (0)
195 {
196 // Give us an extra one so we can enregister a long
197 enum mMSW = mCX|mDX|mDI|mES;       // most significant regs
198 enum mLSW = mAX|mBX|mSI;           // least significant regs
199 }
200 else
201 {
202 enum mMSW = mCX|mDX|mES;           // most significant regs
203 enum mLSW = mAX|mBX|mSI|mDI;       // least significant regs
204 }
205 
206 /* Return !=0 if there is a SIB byte   */
207 uint issib(uint rm) { return (rm & 7) == 4 && (rm & 0xC0) != 0xC0; }
208 
209 static if (0)
210 {
211 // relocation field size is always 32bits
212 //enum is32bitaddr(x,Iflags) (1)
213 }
214 else
215 {
216 //
217 // is32bitaddr works correctly only when x is 0 or 1.  This is
218 // true today for the current definition of I32, but if the definition
219 // of I32 changes, this macro will need to change as well
220 //
221 // Note: even for linux targets, CFaddrsize can be set by the inline
222 // assembler.
223 bool is32bitaddr(bool x,code_flags_t Iflags) { return I64 || (x ^ ((Iflags & CFaddrsize) !=0)); }
224 }
225 
226 
227 /**********************
228  * C library routines.
229  * See callclib().
230  */
231 
232 enum CLIB
233 {
234     lcmp,
235     lmul,
236     ldiv,
237     lmod,
238     uldiv,
239     ulmod,
240 
241     dmul,ddiv,dtst0,dtst0exc,dcmp,dcmpexc,dneg,dadd,dsub,
242     fmul,fdiv,ftst0,ftst0exc,fcmp,fcmpexc,fneg,fadd,fsub,
243 
244     dbllng,lngdbl,dblint,intdbl,
245     dbluns,unsdbl,
246     dblulng,
247     ulngdbl,
248     dblflt,fltdbl,
249     dblllng,
250     llngdbl,
251     dblullng,
252     ullngdbl,
253     dtst,
254     vptrfptr,cvptrfptr,
255 
256     _87topsw,fltto87,dblto87,dblint87,dbllng87,
257     ftst,
258     fcompp,
259     ftest,
260     ftest0,
261     fdiv87,
262 
263     // Complex numbers
264     cmul,
265     cdiv,
266     ccmp,
267 
268     u64_ldbl,
269     ld_u64,
270     MAX
271 }
272 
273 alias code_flags_t = uint;
274 enum
275 {
276     CFes        =        1,     // generate an ES: segment override for this instr
277     CFjmp16     =        2,     // need 16 bit jump offset (long branch)
278     CFtarg      =        4,     // this code is the target of a jump
279     CFseg       =        8,     // get segment of immediate value
280     CFoff       =     0x10,     // get offset of immediate value
281     CFss        =     0x20,     // generate an SS: segment override (not with
282                                 // CFes at the same time, though!)
283     CFpsw       =     0x40,     // we need the flags result after this instruction
284     CFopsize    =     0x80,     // prefix with operand size
285     CFaddrsize  =    0x100,     // prefix with address size
286     CFds        =    0x200,     // need DS override (not with ES, SS, or CS )
287     CFcs        =    0x400,     // need CS override
288     CFfs        =    0x800,     // need FS override
289     CFgs        =   CFcs | CFfs,   // need GS override
290     CFwait      =   0x1000,     // If I32 it indicates when to output a WAIT
291     CFselfrel   =   0x2000,     // if self-relative
292     CFunambig   =   0x4000,     // indicates cannot be accessed by other addressing
293                                 // modes
294     CFtarg2     =   0x8000,     // like CFtarg, but we can't optimize this away
295     CFvolatile  =  0x10000,     // volatile reference, do not schedule
296     CFclassinit =  0x20000,     // class init code
297     CFoffset64  =  0x40000,     // offset is 64 bits
298     CFpc32      =  0x80000,     // I64: PC relative 32 bit fixup
299 
300     CFvex       =  0x100000,    // vex prefix
301     CFvex3      =  0x200000,    // 3 byte vex prefix
302 
303     CFjmp5      =  0x400000,    // always a 5 byte jmp
304     CFswitch    =  0x800000,    // kludge for switch table fixups
305 
306     CFindirect  = 0x1000000,    // OSX32: indirect fixups
307 
308     /* These are for CFpc32 fixups, they're the negative of the offset of the fixup
309      * from the program counter
310      */
311     CFREL       = 0x7000000,
312 
313     CFSEG       = CFes | CFss | CFds | CFcs | CFfs | CFgs,
314     CFPREFIX    = CFSEG | CFopsize | CFaddrsize,
315 }
316 
317 struct code
318 {
319     code *next;
320     code_flags_t Iflags;
321 
322     union
323     {
324         opcode_t Iop;
325         struct Svex
326         {
327           nothrow:
328           align(1):
329             ubyte  op;
330 
331             // [R X B m-mmmm]  [W vvvv L pp]
332             ushort _pp;
333 
334             @property ushort pp() const { return _pp & 3; }
335             @property void pp(ushort v) { _pp = (_pp & ~3) | (v & 3); }
336 
337             @property ushort l() const { return (_pp >> 2) & 1; }
338             @property void l(ushort v) { _pp = cast(ushort)((_pp & ~4) | ((v & 1) << 2)); }
339 
340             @property ushort vvvv() const { return (_pp >> 3) & 0x0F; }
341             @property void vvvv(ushort v) { _pp = cast(ushort)((_pp & ~0x78) | ((v & 0x0F) << 3)); }
342 
343             @property ushort w() const { return (_pp >> 7) & 1; }
344             @property void w(ushort v) { _pp = cast(ushort)((_pp & ~0x80) | ((v & 1) << 7)); }
345 
346             @property ushort mmmm() const { return (_pp >> 8) & 0x1F; }
347             @property void mmmm(ushort v) { _pp = cast(ushort)((_pp & ~0x1F00) | ((v & 0x1F) << 8)); }
348 
349             @property ushort b() const { return (_pp >> 13) & 1; }
350             @property void b(ushort v) { _pp = cast(ushort)((_pp & ~0x2000) | ((v & 1) << 13)); }
351 
352             @property ushort x() const { return (_pp >> 14) & 1; }
353             @property void x(ushort v) { _pp = cast(ushort)((_pp & ~0x4000) | ((v & 1) << 14)); }
354 
355             @property ushort r() const { return (_pp >> 15) & 1; }
356             @property void r(ushort v) { _pp = cast(ushort)((_pp & ~0x8000) | (v << 15)); }
357 
358             ubyte pfx; // always 0xC4
359         }
360         Svex Ivex;
361     }
362 
363     /* The _EA is the "effective address" for the instruction, and consists of the modregrm byte,
364      * the sib byte, and the REX prefix byte. The 16 bit code generator just used the modregrm,
365      * the 32 bit x86 added the sib, and the 64 bit one added the rex.
366      */
367     union
368     {
369         uint Iea;
370         struct
371         {
372             ubyte Irm;          // reg/mode
373             ubyte Isib;         // SIB byte
374             ubyte Irex;         // REX prefix
375         }
376     }
377 
378     /* IFL1 and IEV1 are the first operand, which usually winds up being the offset to the Effective
379      * Address. IFL1 is the tag saying which variant type is in IEV1. IFL2 and IEV2 is the second
380      * operand, usually for immediate instructions.
381      */
382 
383     ubyte IFL1,IFL2;    // FLavors of 1st, 2nd operands
384     evc IEV1;             // 1st operand, if any
385     evc IEV2;             // 2nd operand, if any
386 
387   nothrow:
388     void orReg(uint reg)
389     {   if (reg & 8)
390             Irex |= REX_R;
391         Irm |= modregrm(0, reg & 7, 0);
392     }
393 
394     void setReg(uint reg)
395     {
396         Irex &= ~REX_R;
397         Irm &= cast(ubyte)~cast(uint)modregrm(0, 7, 0);
398         orReg(reg);
399     }
400 
401     bool isJumpOP() { return Iop == JMP || Iop == JMPS; }
402 
403     extern (C++) void print()               // pretty-printer
404     {
405         code_print(&this);
406     }
407 }
408 
409 extern (C) void code_print(code*);
410 
411 /*******************
412  * Some instructions.
413  */
414 
415 enum
416 {
417     SEGES   = 0x26,
418     SEGCS   = 0x2E,
419     SEGSS   = 0x36,
420     SEGDS   = 0x3E,
421     SEGFS   = 0x64,
422     SEGGS   = 0x65,
423 
424     CMP     = 0x3B,
425     CALL    = 0xE8,
426     JMP     = 0xE9,    // Intra-Segment Direct
427     JMPS    = 0xEB,    // JMP SHORT
428     JCXZ    = 0xE3,
429     LOOP    = 0xE2,
430     LES     = 0xC4,
431     LEA     = 0x8D,
432     LOCK    = 0xF0,
433     INT3    = 0xCC,
434     HLT     = 0xF4,
435 
436     STO     = 0x89,
437     LOD     = 0x8B,
438 
439     JO      = 0x70,
440     JNO     = 0x71,
441     JC      = 0x72,
442     JB      = 0x72,
443     JNC     = 0x73,
444     JAE     = 0x73,
445     JE      = 0x74,
446     JNE     = 0x75,
447     JBE     = 0x76,
448     JA      = 0x77,
449     JS      = 0x78,
450     JNS     = 0x79,
451     JP      = 0x7A,
452     JNP     = 0x7B,
453     JL      = 0x7C,
454     JGE     = 0x7D,
455     JLE     = 0x7E,
456     JG      = 0x7F,
457 
458     UD2     = 0x0F0B,
459 
460     // NOP is used as a placeholder in the linked list of instructions, no
461     // actual code will be generated for it.
462     NOP     = SEGCS,   // don't use 0x90 because the
463                        // Windows stuff wants to output 0x90's
464 
465     ASM     = SEGSS,   // string of asm bytes
466 
467     ESCAPE  = SEGDS,   // marker that special information is here
468                        // (Iop2 is the type of special information)
469 }
470 
471 
472 enum ESCAPEmask = 0xFF; // code.Iop & ESCAPEmask ==> actual Iop
473 
474 enum
475 {
476     ESClinnum   = (1 << 8),      // line number information
477     ESCctor     = (2 << 8),      // object is constructed
478     ESCdtor     = (3 << 8),      // object is destructed
479     ESCmark     = (4 << 8),      // mark eh stack
480     ESCrelease  = (5 << 8),      // release eh stack
481     ESCoffset   = (6 << 8),      // set code offset for eh
482     ESCadjesp   = (7 << 8),      // adjust ESP by IEV2.Vint
483     ESCmark2    = (8 << 8),      // mark eh stack
484     ESCrelease2 = (9 << 8),      // release eh stack
485     ESCframeptr = (10 << 8),     // replace with load of frame pointer
486     ESCdctor    = (11 << 8),     // D object is constructed
487     ESCddtor    = (12 << 8),     // D object is destructed
488     ESCadjfpu   = (13 << 8),     // adjust fpustackused by IEV2.Vint
489     ESCfixesp   = (14 << 8),     // reset ESP to end of local frame
490 }
491 
492 /*********************************
493  * Macros to ease generating code
494  * modregrm:    generate mod reg r/m field
495  * modregxrm:   reg could be R8..R15
496  * modregrmx:   rm could be R8..R15
497  * modregxrmx:  reg or rm could be R8..R15
498  * NEWREG:      change reg field of x to r
499  * genorreg:    OR  t,f
500  */
501 
502 ubyte modregrm (uint m, uint r, uint rm) { return cast(ubyte)((m << 6) | (r << 3) | rm); }
503 uint modregxrm (uint m, uint r, uint rm) { return ((r&8)<<15)|modregrm(m,r&7,rm); }
504 uint modregrmx (uint m, uint r, uint rm) { return ((rm&8)<<13)|modregrm(m,r,rm&7); }
505 uint modregxrmx(uint m, uint r, uint rm) { return ((r&8)<<15)|((rm&8)<<13)|modregrm(m,r&7,rm&7); }
506 
507 void NEWREXR(ref ubyte x, uint r)  { x = (x&~REX_R)|((r&8)>>1); }
508 void NEWREG (ref ubyte x, uint r)  { x = cast(ubyte)((x & ~(7 << 3)) | (r << 3)); }
509 void code_newreg(code* c, uint r)  { NEWREG(c.Irm,r&7); NEWREXR(c.Irex,r); }
510 
511 //#define genorreg(c,t,f)         genregs((c),0x09,(f),(t))
512 
513 enum
514 {
515     REX     = 0x40,        // REX prefix byte, OR'd with the following bits:
516     REX_W   = 8,           // 0 = default operand size, 1 = 64 bit operand size
517     REX_R   = 4,           // high bit of reg field of modregrm
518     REX_X   = 2,           // high bit of sib index reg
519     REX_B   = 1,           // high bit of rm field, sib base reg, or opcode reg
520 }
521 
522 uint VEX2_B1(code.Svex ivex)
523 {
524     return
525         ivex.r    << 7 |
526         ivex.vvvv << 3 |
527         ivex.l    << 2 |
528         ivex.pp;
529 }
530 
531 uint VEX3_B1(code.Svex ivex)
532 {
533     return
534         ivex.r    << 7 |
535         ivex.x    << 6 |
536         ivex.b    << 5 |
537         ivex.mmmm;
538 }
539 
540 uint VEX3_B2(code.Svex ivex)
541 {
542     return
543         ivex.w    << 7 |
544         ivex.vvvv << 3 |
545         ivex.l    << 2 |
546         ivex.pp;
547 }
548 
549 bool ADDFWAIT() { return config.target_cpu <= TARGET_80286; }
550 
551 /************************************
552  */
553 
554 extern (C++):
555 
556 struct NDP
557 {
558     elem *e;                    // which elem is stored here (NULL if none)
559     uint offset;            // offset from e (used for complex numbers)
560 }
561 
562 struct Globals87
563 {
564     NDP[8] stack;              // 8087 stack
565     int stackused = 0;         // number of items on the 8087 stack
566 
567     Barray!NDP save;           // 8087 values spilled to memory
568 }
569 
570 extern (C++) extern __gshared Globals87 global87;
571 
572 void getlvalue_msw(code *);
573 void getlvalue_lsw(code *);
574 void getlvalue(ref CodeBuilder cdb, code *pcs, elem *e, regm_t keepmsk);
575 void loadea(ref CodeBuilder cdb, elem *e, code *cs, uint op, uint reg, targ_size_t offset, regm_t keepmsk, regm_t desmsk);
576