1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1994-1998 by Symantec
6  *              Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod3.d, backend/cod3.d)
10  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod3.d
11  */
12 
13 module dmd.backend.cod3;
14 
15 version (SCPP)
16     version = COMPILE;
17 version (MARS)
18     version = COMPILE;
19 
20 version (COMPILE)
21 {
22 
23 import core.stdc.stdio;
24 import core.stdc.stdlib;
25 import core.stdc.string;
26 
27 import dmd.backend.backend;
28 import dmd.backend.cc;
29 import dmd.backend.cdef;
30 import dmd.backend.cgcse;
31 import dmd.backend.code;
32 import dmd.backend.code_x86;
33 import dmd.backend.codebuilder;
34 import dmd.backend.dlist;
35 import dmd.backend.dvec;
36 import dmd.backend.melf;
37 import dmd.backend.mem;
38 import dmd.backend.el;
39 import dmd.backend.exh;
40 import dmd.backend.global;
41 import dmd.backend.obj;
42 import dmd.backend.oper;
43 import dmd.backend.outbuf;
44 import dmd.backend.rtlsym;
45 import dmd.backend.ty;
46 import dmd.backend.type;
47 import dmd.backend.xmm;
48 
49 version (SCPP)
50 {
51     import parser;
52     import precomp;
53 }
54 
55 extern (C++):
56 
57 nothrow:
58 
59 version (MARS)
60     enum MARS = true;
61 else
62     enum MARS = false;
63 
64 int REGSIZE();
65 
66 extern __gshared CGstate cgstate;
67 extern __gshared ubyte[FLMAX] segfl;
68 extern __gshared bool[FLMAX] stackfl, flinsymtab;
69 
70 private extern (D) uint mask(uint m) { return 1 << m; }
71 
72 //private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); }
73 
74 extern __gshared targ_size_t retsize;
75 
76 enum JMPJMPTABLE = false;               // benchmarking shows it's slower
77 
78 enum MINLL =           0x8000_0000_0000_0000L;
79 enum MAXLL =           0x7FFF_FFFF_FFFF_FFFFL;
80 
81 /*************
82  * Size in bytes of each instruction.
83  * 0 means illegal instruction.
84  * bit  M:      if there is a modregrm field (EV1 is reserved for modregrm)
85  * bit  T:      if there is a second operand (EV2)
86  * bit  E:      if second operand is only 8 bits
87  * bit  A:      a short version exists for the AX reg
88  * bit  R:      a short version exists for regs
89  * bits 2..0:   size of instruction (excluding optional bytes)
90  */
91 
92 enum
93 {
94     M = 0x80,
95     T = 0x40,
96     E = 0x20,
97     A = 0x10,
98     R = 0x08,
99     W = 0,
100 }
101 
102 private __gshared ubyte[256] inssize =
103 [       M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 00 */
104         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 08 */
105         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 10 */
106         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 18 */
107         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 20 */
108         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 28 */
109         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 30 */
110         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 38 */
111         1,1,1,1,                1,1,1,1,                /* 40 */
112         1,1,1,1,                1,1,1,1,                /* 48 */
113         1,1,1,1,                1,1,1,1,                /* 50 */
114         1,1,1,1,                1,1,1,1,                /* 58 */
115         1,1,M|2,M|2,            1,1,1,1,                /* 60 */
116         T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1,               /* 68 */
117         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* 70 */
118         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* 78 */
119         M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3,      M|2,M|2,M|2,M|A|R|2, /* 80 */
120         M|A|2,M|A|2,M|A|2,M|A|2,        M|2,M|2,M|2,M|R|2,      /* 88 */
121         1,1,1,1,                1,1,1,1,                /* 90 */
122         1,1,T|5,1,              1,1,1,1,                /* 98 */
123 
124      // cod3_set32() patches this
125     //  T|5,T|5,T|5,T|5,        1,1,1,1,                /* A0 */
126         T|3,T|3,T|3,T|3,        1,1,1,1,                /* A0 */
127 
128         T|E|2,T|3,1,1,          1,1,1,1,                /* A8 */
129         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* B0 */
130         T|3,T|3,T|3,T|3,        T|3,T|3,T|3,T|3,                /* B8 */
131         M|T|E|3,M|T|E|3,T|3,1,  M|2,M|2,M|T|E|R|3,M|T|R|4,      /* C0 */
132         T|E|4,1,T|3,1,          1,T|E|2,1,1,            /* C8 */
133         M|2,M|2,M|2,M|2,        T|E|2,T|E|2,0,1,        /* D0 */
134         /* For the floating instructions, allow room for the FWAIT      */
135         M|2,M|2,M|2,M|2,        M|2,M|2,M|2,M|2,        /* D8 */
136         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* E0 */
137         T|3,T|3,T|5,T|E|2,              1,1,1,1,                /* E8 */
138         1,0,1,1,                1,1,M|A|2,M|A|2,                /* F0 */
139         1,1,1,1,                1,1,M|2,M|R|2                   /* F8 */
140 ];
141 
142 private __gshared const ubyte[256] inssize32 =
143 [       2,2,2,2,        2,5,1,1,                /* 00 */
144         2,2,2,2,        2,5,1,1,                /* 08 */
145         2,2,2,2,        2,5,1,1,                /* 10 */
146         2,2,2,2,        2,5,1,1,                /* 18 */
147         2,2,2,2,        2,5,1,1,                /* 20 */
148         2,2,2,2,        2,5,1,1,                /* 28 */
149         2,2,2,2,        2,5,1,1,                /* 30 */
150         2,2,2,2,        2,5,1,1,                /* 38 */
151         1,1,1,1,        1,1,1,1,                /* 40 */
152         1,1,1,1,        1,1,1,1,                /* 48 */
153         1,1,1,1,        1,1,1,1,                /* 50 */
154         1,1,1,1,        1,1,1,1,                /* 58 */
155         1,1,2,2,        1,1,1,1,                /* 60 */
156         5,6,2,3,        1,1,1,1,                /* 68 */
157         2,2,2,2,        2,2,2,2,                /* 70 */
158         2,2,2,2,        2,2,2,2,                /* 78 */
159         3,6,3,3,        2,2,2,2,                /* 80 */
160         2,2,2,2,        2,2,2,2,                /* 88 */
161         1,1,1,1,        1,1,1,1,                /* 90 */
162         1,1,7,1,        1,1,1,1,                /* 98 */
163         5,5,5,5,        1,1,1,1,                /* A0 */
164         2,5,1,1,        1,1,1,1,                /* A8 */
165         2,2,2,2,        2,2,2,2,                /* B0 */
166         5,5,5,5,        5,5,5,5,                /* B8 */
167         3,3,3,1,        2,2,3,6,                /* C0 */
168         4,1,3,1,        1,2,1,1,                /* C8 */
169         2,2,2,2,        2,2,0,1,                /* D0 */
170         /* For the floating instructions, don't need room for the FWAIT */
171         2,2,2,2,        2,2,2,2,                /* D8 */
172 
173         2,2,2,2,        2,2,2,2,                /* E0 */
174         5,5,7,2,        1,1,1,1,                /* E8 */
175         1,0,1,1,        1,1,2,2,                /* F0 */
176         1,1,1,1,        1,1,2,2                 /* F8 */
177 ];
178 
179 /* For 2 byte opcodes starting with 0x0F        */
180 private __gshared ubyte[256] inssize2 =
181 [       M|3,M|3,M|3,M|3,        2,2,2,2,                // 00
182         2,2,M|3,2,              2,M|3,2,M|T|E|4,        // 08
183         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 10
184         M|3,2,2,2,              2,2,2,2,                // 18
185         M|3,M|3,M|3,M|3,        M|3,2,M|3,2,            // 20
186         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 28
187         2,2,2,2,                2,2,2,2,                // 30
188         M|4,2,M|T|E|5,2,        2,2,2,2,                // 38
189         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 40
190         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 48
191         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 50
192         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 58
193         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 60
194         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 68
195         M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70
196         2,2,2,2,                M|3,M|3,M|3,M|3,        // 78
197         W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80
198         W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88
199         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // 90
200         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // 98
201         2,2,2,M|3,      M|T|E|4,M|3,2,2,        // A0
202         2,2,2,M|3,      M|T|E|4,M|3,M|3,M|3,    // A8
203         M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,     // B0
204         M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3,     // B8
205         M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3,       // C0
206         2,2,2,2,        2,2,2,2,                // C8
207         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // D0
208         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // D8
209         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // E0
210         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // E8
211         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // F0
212         M|3,M|3,M|3,M|3, M|3,M|3,M|3,2          // F8
213 ];
214 
215 /*************************************************
216  * Generate code to save `reg` in `regsave` stack area.
217  * Params:
218  *      regsave = register save areay on stack
219  *      cdb = where to write generated code
220  *      reg = register to save
221  *      idx = set to location in regsave for use in REGSAVE_restore()
222  */
223 
224 void REGSAVE_save(ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, out uint idx)
225 {
226     if (isXMMreg(reg))
227     {
228         regsave.alignment = 16;
229         regsave.idx = (regsave.idx + 15) & ~15;
230         idx = regsave.idx;
231         regsave.idx += 16;
232         // MOVD idx[RBP],xmm
233         opcode_t op = STOAPD;
234         if (TARGET_LINUX && I32)
235             // Haven't yet figured out why stack is not aligned to 16
236             op = STOUPD;
237         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx);
238     }
239     else
240     {
241         if (!regsave.alignment)
242             regsave.alignment = REGSIZE;
243         idx = regsave.idx;
244         regsave.idx += REGSIZE;
245         // MOV idx[RBP],reg
246         cdb.genc1(0x89,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx);
247         if (I64)
248             code_orrex(cdb.last(), REX_W);
249     }
250     reflocal = true;
251     if (regsave.idx > regsave.top)
252         regsave.top = regsave.idx;              // keep high water mark
253 }
254 
255 /*******************************
256  * Restore `reg` from `regsave` area.
257  * Complement REGSAVE_save().
258  */
259 
260 void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, uint idx)
261 {
262     if (isXMMreg(reg))
263     {
264         assert(regsave.alignment == 16);
265         // MOVD xmm,idx[RBP]
266         opcode_t op = LODAPD;
267         if (TARGET_LINUX && I32)
268             // Haven't yet figured out why stack is not aligned to 16
269             op = LODUPD;
270         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx);
271     }
272     else
273     {   // MOV reg,idx[RBP]
274         cdb.genc1(0x8B,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx);
275         if (I64)
276             code_orrex(cdb.last(), REX_W);
277     }
278 }
279 
280 /************************************
281  * Size for vex encoded instruction.
282  */
283 
284 ubyte vex_inssize(code *c)
285 {
286     assert(c.Iflags & CFvex && c.Ivex.pfx == 0xC4);
287     ubyte ins;
288     if (c.Iflags & CFvex3)
289     {
290         switch (c.Ivex.mmmm)
291         {
292         case 0: // no prefix
293         case 1: // 0F
294             ins = cast(ubyte)(inssize2[c.Ivex.op] + 2);
295             break;
296         case 2: // 0F 38
297             ins = cast(ubyte)(inssize2[0x38] + 1);
298             break;
299         case 3: // 0F 3A
300             ins = cast(ubyte)(inssize2[0x3A] + 1);
301             break;
302         default:
303             printf("Iop = %x mmmm = %x\n", c.Iop, c.Ivex.mmmm);
304             assert(0);
305         }
306     }
307     else
308     {
309         ins = cast(ubyte)(inssize2[c.Ivex.op] + 1);
310     }
311     return ins;
312 }
313 
314 /************************************
315  * Determine if there is a modregrm byte for code.
316  */
317 
318 int cod3_EA(code *c)
319 {   uint ins;
320 
321     opcode_t op1 = c.Iop & 0xFF;
322     if (op1 == ESCAPE)
323         ins = 0;
324     else if ((c.Iop & 0xFFFD00) == 0x0F3800)
325         ins = inssize2[(c.Iop >> 8) & 0xFF];
326     else if ((c.Iop & 0xFF00) == 0x0F00)
327         ins = inssize2[op1];
328     else
329         ins = inssize[op1];
330     return ins & M;
331 }
332 
333 /********************************
334  * setup ALLREGS and BYTEREGS
335  * called by: codgen
336  */
337 
338 void cod3_initregs()
339 {
340     if (I64)
341     {
342         ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15;
343         BYTEREGS = ALLREGS;
344     }
345     else
346     {
347         ALLREGS = ALLREGS_INIT;
348         BYTEREGS = BYTEREGS_INIT;
349     }
350 }
351 
352 /********************************
353  * set initial global variable values
354  */
355 
356 void cod3_setdefault()
357 {
358     fregsaved = mBP | mSI | mDI;
359 }
360 
361 /********************************
362  * Fix global variables for 386.
363  */
364 
365 void cod3_set32()
366 {
367     inssize[0xA0] = T|5;
368     inssize[0xA1] = T|5;
369     inssize[0xA2] = T|5;
370     inssize[0xA3] = T|5;
371     BPRM = 5;                       /* [EBP] addressing mode        */
372     fregsaved = mBP | mBX | mSI | mDI;      // saved across function calls
373     FLOATREGS = FLOATREGS_32;
374     FLOATREGS2 = FLOATREGS2_32;
375     DOUBLEREGS = DOUBLEREGS_32;
376     if (config.flags3 & CFG3eseqds)
377         fregsaved |= mES;
378 
379     foreach (ref v; inssize2[0x80 .. 0x90])
380         v = W|T|6;
381 
382     TARGET_STACKALIGN = config.fpxmmregs ? 16 : 4;
383 }
384 
385 /********************************
386  * Fix global variables for I64.
387  */
388 
389 void cod3_set64()
390 {
391     inssize[0xA0] = T|5;                // MOV AL,mem
392     inssize[0xA1] = T|5;                // MOV RAX,mem
393     inssize[0xA2] = T|5;                // MOV mem,AL
394     inssize[0xA3] = T|5;                // MOV mem,RAX
395     BPRM = 5;                           // [RBP] addressing mode
396 
397 static if (TARGET_WINDOS)
398 {
399     fregsaved = mBP | mBX | mDI | mSI | mR12 | mR13 | mR14 | mR15 | mES | mXMM6 | mXMM7; // also XMM8..15;
400 }
401 else
402 {
403     fregsaved = mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES;      // saved across function calls
404 }
405     FLOATREGS = FLOATREGS_64;
406     FLOATREGS2 = FLOATREGS2_64;
407     DOUBLEREGS = DOUBLEREGS_64;
408 
409     ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI|  mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15;
410     BYTEREGS = ALLREGS;
411 
412     foreach (ref v; inssize2[0x80 .. 0x90])
413         v = W|T|6;
414 
415     TARGET_STACKALIGN = config.fpxmmregs ? 16 : 8;
416 }
417 
418 /*********************************
419  * Word or dword align start of function.
420  * Params:
421  *      seg = segment to write alignment bytes to
422  *      nbytes = number of alignment bytes to write
423  */
424 void cod3_align_bytes(int seg, size_t nbytes)
425 {
426     /* Table 4-2 from Intel Instruction Set Reference M-Z
427      * 1 bytes NOP                                        90
428      * 2 bytes 66 NOP                                     66 90
429      * 3 bytes NOP DWORD ptr [EAX]                        0F 1F 00
430      * 4 bytes NOP DWORD ptr [EAX + 00H]                  0F 1F 40 00
431      * 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H]          0F 1F 44 00 00
432      * 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H]       66 0F 1F 44 00 00
433      * 7 bytes NOP DWORD ptr [EAX + 00000000H]            0F 1F 80 00 00 00 00
434      * 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H]    0F 1F 84 00 00 00 00 00
435      * 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00
436      * only for CPUs: CPUID.01H.EAX[Bytes 11:8] = 0110B or 1111B
437      */
438 
439     assert(SegData[seg].SDseg == seg);
440 
441     while (nbytes)
442     {   size_t n = nbytes;
443         const(char)* p;
444 
445         if (nbytes > 1 && (I64 || config.fpxmmregs))
446         {
447             switch (n)
448             {
449                 case 2:  p = "\x66\x90"; break;
450                 case 3:  p = "\x0F\x1F\x00"; break;
451                 case 4:  p = "\x0F\x1F\x40\x00"; break;
452                 case 5:  p = "\x0F\x1F\x44\x00\x00"; break;
453                 case 6:  p = "\x66\x0F\x1F\x44\x00\x00"; break;
454                 case 7:  p = "\x0F\x1F\x80\x00\x00\x00\x00"; break;
455                 case 8:  p = "\x0F\x1F\x84\x00\x00\x00\x00\x00"; break;
456                 default: p = "\x66\x0F\x1F\x84\x00\x00\x00\x00\x00"; n = 9; break;
457             }
458         }
459         else
460         {
461             static immutable ubyte[15] nops = [
462                 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
463             ]; // XCHG AX,AX
464             if (n > nops.length)
465                 n = nops.length;
466             p = cast(char*)nops;
467         }
468         objmod.write_bytes(SegData[seg],cast(uint)n,cast(char*)p);
469         nbytes -= n;
470     }
471 }
472 
473 /****************************
474  * Align start of function.
475  * Params:
476  *      seg = segment of function
477  */
478 void cod3_align(int seg)
479 {
480     uint nbytes;
481 static if (TARGET_WINDOS)
482 {
483     if (config.flags4 & CFG4speed)      // if optimized for speed
484     {
485         // Pick alignment based on CPU target
486         if (config.target_cpu == TARGET_80486 ||
487             config.target_cpu >= TARGET_PentiumPro)
488         {   // 486 does reads on 16 byte boundaries, so if we are near
489             // such a boundary, align us to it
490 
491             nbytes = -Offset(seg) & 15;
492             if (nbytes < 8)
493                 cod3_align_bytes(seg, nbytes);
494         }
495     }
496 }
497 else
498 {
499     nbytes = -Offset(seg) & 7;
500     cod3_align_bytes(seg, nbytes);
501 }
502 }
503 
504 
505 /**********************************
506  * Generate code to adjust the stack pointer by `nbytes`
507  * Params:
508  *      cdb = code builder
509  *      nbytes = number of bytes to adjust stack pointer
510  */
511 void cod3_stackadj(ref CodeBuilder cdb, int nbytes)
512 {
513     //printf("cod3_stackadj(%d)\n", nbytes);
514     uint grex = I64 ? REX_W << 16 : 0;
515     uint rm;
516     if (nbytes > 0)
517         rm = modregrm(3,5,SP); // SUB ESP,nbytes
518     else
519     {
520         nbytes = -nbytes;
521         rm = modregrm(3,0,SP); // ADD ESP,nbytes
522     }
523     cdb.genc2(0x81, grex | rm, nbytes);
524 }
525 
526 /**********************************
527  * Generate code to align the stack pointer at `nbytes`
528  * Params:
529  *      cdb = code builder
530  *      nbytes = number of bytes to align stack pointer
531  */
532 void cod3_stackalign(ref CodeBuilder cdb, int nbytes)
533 {
534     //printf("cod3_stackalign(%d)\n", nbytes);
535     const grex = I64 ? REX_W << 16 : 0;
536     const rm = modregrm(3, 4, SP);             // AND ESP,-nbytes
537     cdb.genc2(0x81, grex | rm, -nbytes);
538 }
539 
540 static if (ELFOBJ)
541 {
542 /* Constructor that links the ModuleReference to the head of
543  * the list pointed to by _Dmoduleref
544  */
545 void cod3_buildmodulector(Outbuffer* buf, int codeOffset, int refOffset)
546 {
547     /*      ret
548      * codeOffset:
549      *      pushad
550      *      mov     EAX,&ModuleReference
551      *      mov     ECX,_DmoduleRef
552      *      mov     EDX,[ECX]
553      *      mov     [EAX],EDX
554      *      mov     [ECX],EAX
555      *      popad
556      *      ret
557      */
558 
559     const int seg = CODE;
560 
561     if (I64 && config.flags3 & CFG3pic)
562     {   // LEA RAX,ModuleReference[RIP]
563         buf.writeByte(REX | REX_W);
564         buf.writeByte(LEA);
565         buf.writeByte(modregrm(0,AX,5));
566         codeOffset += 3;
567         codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_PC32, 3 /*STI_DATA*/, refOffset - 4);
568 
569         // MOV RCX,_DmoduleRef@GOTPCREL[RIP]
570         buf.writeByte(REX | REX_W);
571         buf.writeByte(0x8B);
572         buf.writeByte(modregrm(0,CX,5));
573         codeOffset += 3;
574         codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_GOTPCREL, Obj.external_def("_Dmodule_ref"), -4);
575     }
576     else
577     {
578         /* movl ModuleReference*, %eax */
579         buf.writeByte(0xB8);
580         codeOffset += 1;
581         const uint reltype = I64 ? R_X86_64_32 : R_386_32;
582         codeOffset += Obj.writerel(seg, codeOffset, reltype, 3 /*STI_DATA*/, refOffset);
583 
584         /* movl _Dmodule_ref, %ecx */
585         buf.writeByte(0xB9);
586         codeOffset += 1;
587         codeOffset += Obj.writerel(seg, codeOffset, reltype, Obj.external_def("_Dmodule_ref"), 0);
588     }
589 
590     if (I64)
591         buf.writeByte(REX | REX_W);
592     buf.writeByte(0x8B); buf.writeByte(0x11); /* movl (%ecx), %edx */
593     if (I64)
594         buf.writeByte(REX | REX_W);
595     buf.writeByte(0x89); buf.writeByte(0x10); /* movl %edx, (%eax) */
596     if (I64)
597         buf.writeByte(REX | REX_W);
598     buf.writeByte(0x89); buf.writeByte(0x01); /* movl %eax, (%ecx) */
599 
600     buf.writeByte(0xC3); /* ret */
601 }
602 
603 }
604 
605 
606 /*****************************
607  * Given a type, return a mask of
608  * registers to hold that type.
609  * Input:
610  *      tyf     function type
611  */
612 
613 regm_t regmask(tym_t tym, tym_t tyf)
614 {
615     switch (tybasic(tym))
616     {
617         case TYvoid:
618         case TYstruct:
619         case TYarray:
620             return 0;
621 
622         case TYbool:
623         case TYwchar_t:
624         case TYchar16:
625         case TYchar:
626         case TYschar:
627         case TYuchar:
628         case TYshort:
629         case TYushort:
630         case TYint:
631         case TYuint:
632         case TYnullptr:
633         case TYnptr:
634         case TYnref:
635         case TYsptr:
636         case TYcptr:
637         case TYimmutPtr:
638         case TYsharePtr:
639         case TYrestrictPtr:
640         case TYfgPtr:
641             return mAX;
642 
643         case TYfloat:
644         case TYifloat:
645             if (I64)
646                 return mXMM0;
647             if (config.exe & EX_flat)
648                 return mST0;
649             goto case TYlong;
650 
651         case TYlong:
652         case TYulong:
653         case TYdchar:
654             if (!I16)
655                 return mAX;
656             goto case TYfptr;
657 
658         case TYfptr:
659         case TYhptr:
660             return mDX | mAX;
661 
662         case TYcent:
663         case TYucent:
664             assert(I64);
665             return mDX | mAX;
666 
667         case TYvptr:
668             return mDX | mBX;
669 
670         case TYdouble:
671         case TYdouble_alias:
672         case TYidouble:
673             if (I64)
674                 return mXMM0;
675             if (config.exe & EX_flat)
676                 return mST0;
677             return DOUBLEREGS;
678 
679         case TYllong:
680         case TYullong:
681             return I64 ? cast(regm_t) mAX : (I32 ? mDX | mAX : DOUBLEREGS);
682 
683         case TYldouble:
684         case TYildouble:
685             return mST0;
686 
687         case TYcfloat:
688 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
689 {
690             if (I32 && tybasic(tyf) == TYnfunc)
691                 return mDX | mAX;
692 }
693             goto case TYcdouble;
694 
695         case TYcdouble:
696             if (I64)
697                 return mXMM0 | mXMM1;
698             goto case TYcldouble;
699 
700         case TYcldouble:
701             return mST01;
702 
703         // SIMD vector types
704         case TYfloat4:
705         case TYdouble2:
706         case TYschar16:
707         case TYuchar16:
708         case TYshort8:
709         case TYushort8:
710         case TYlong4:
711         case TYulong4:
712         case TYllong2:
713         case TYullong2:
714 
715         case TYfloat8:
716         case TYdouble4:
717         case TYschar32:
718         case TYuchar32:
719         case TYshort16:
720         case TYushort16:
721         case TYlong8:
722         case TYulong8:
723         case TYllong4:
724         case TYullong4:
725             if (!config.fpxmmregs)
726             {   printf("SIMD operations not supported on this platform\n");
727                 exit(1);
728             }
729             return mXMM0;
730 
731         default:
732             debug WRTYxx(tym);
733             assert(0);
734     }
735 }
736 
737 /*******************************
738  * setup register allocator parameters with platform specific data
739  */
740 void cgreg_dst_regs(reg_t* dst_integer_reg, reg_t* dst_float_reg)
741 {
742     *dst_integer_reg = AX;
743     *dst_float_reg   = XMM0;
744 }
745 
746 void cgreg_set_priorities(tym_t ty, const(reg_t)** pseq, const(reg_t)** pseqmsw)
747 {
748     const sz = tysize(ty);
749 
750     if (tyxmmreg(ty))
751     {
752         static immutable ubyte[9] sequence = [XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,NOREG];
753         *pseq = sequence.ptr;
754     }
755     else if (I64)
756     {
757         if (sz == REGSIZE * 2)
758         {
759             static immutable ubyte[3] seqmsw1 = [CX,DX,NOREG];
760             static immutable ubyte[5] seqlsw1 = [AX,BX,SI,DI,NOREG];
761             *pseq = seqlsw1.ptr;
762             *pseqmsw = seqmsw1.ptr;
763         }
764         else
765         {   // R10 is reserved for the static link
766             static immutable ubyte[15] sequence2 = [AX,CX,DX,SI,DI,R8,R9,R11,BX,R12,R13,R14,R15,BP,NOREG];
767             *pseq = cast(ubyte*)sequence2.ptr;
768         }
769     }
770     else if (I32)
771     {
772         if (sz == REGSIZE * 2)
773         {
774             static immutable ubyte[5] seqlsw3 = [AX,BX,SI,DI,NOREG];
775             static immutable ubyte[3] seqmsw3 = [CX,DX,NOREG];
776             *pseq = seqlsw3.ptr;
777             *pseqmsw = seqmsw3.ptr;
778         }
779         else
780         {
781             static immutable ubyte[8] sequence4 = [AX,CX,DX,BX,SI,DI,BP,NOREG];
782             *pseq = sequence4.ptr;
783         }
784     }
785     else
786     {   assert(I16);
787         if (typtr(ty))
788         {
789             // For pointer types, try to pick index register first
790             static immutable ubyte[8] seqidx5 = [BX,SI,DI,AX,CX,DX,BP,NOREG];
791             *pseq = seqidx5.ptr;
792         }
793         else
794         {
795             // Otherwise, try to pick index registers last
796             static immutable ubyte[8] sequence6 = [AX,CX,DX,BX,SI,DI,BP,NOREG];
797             *pseq = sequence6.ptr;
798         }
799     }
800 }
801 
802 /*******************************************
803  * Call finally block.
804  * Params:
805  *      bf = block to call
806  *      retregs = registers to preserve across call
807  * Returns:
808  *      code generated
809  */
810 private code *callFinallyBlock(block *bf, regm_t retregs)
811 {
812     CodeBuilder cdbs; cdbs.ctor();
813     CodeBuilder cdbr; cdbr.ctor();
814     int nalign = 0;
815 
816     calledFinally = true;
817     uint npush = gensaverestore(retregs,cdbs,cdbr);
818 
819     if (STACKALIGN >= 16)
820     {   npush += REGSIZE;
821         if (npush & (STACKALIGN - 1))
822         {   nalign = STACKALIGN - (npush & (STACKALIGN - 1));
823             cod3_stackadj(cdbs, nalign);
824         }
825     }
826     cdbs.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf);
827     regcon.immed.mval = 0;
828     if (nalign)
829         cod3_stackadj(cdbs, -nalign);
830     cdbs.append(cdbr);
831     return cdbs.finish();
832 }
833 
834 /*******************************
835  * Generate block exit code
836  */
837 void outblkexitcode(ref CodeBuilder cdb, block *bl, ref int anyspill, const(char)* sflsave, Symbol** retsym, const regm_t mfuncregsave)
838 {
839     CodeBuilder cdb2; cdb2.ctor();
840     elem *e = bl.Belem;
841     block *nextb;
842     regm_t retregs = 0;
843 
844     if (bl.BC != BCasm)
845         assert(bl.Bcode == null);
846 
847     switch (bl.BC)                     /* block exit condition         */
848     {
849         case BCiftrue:
850         {
851             bool jcond = true;
852             block *bs1 = bl.nthSucc(0);
853             block *bs2 = bl.nthSucc(1);
854             if (bs1 == bl.Bnext)
855             {   // Swap bs1 and bs2
856                 block *btmp;
857 
858                 jcond ^= 1;
859                 btmp = bs1;
860                 bs1 = bs2;
861                 bs2 = btmp;
862             }
863             logexp(cdb,e,jcond,FLblock,cast(code *) bs1);
864             nextb = bs2;
865         }
866         L5:
867             if (configv.addlinenumbers && bl.Bsrcpos.Slinnum &&
868                 !(funcsym_p.ty() & mTYnaked))
869             {
870                 //printf("BCiftrue: %s(%u)\n", bl.Bsrcpos.Sfilename ? bl.Bsrcpos.Sfilename : "", bl.Bsrcpos.Slinnum);
871                 cdb.genlinnum(bl.Bsrcpos);
872             }
873             if (nextb != bl.Bnext)
874             {
875                 assert(!(bl.Bflags & BFLepilog));
876                 genjmp(cdb,JMP,FLblock,nextb);
877             }
878             break;
879 
880         case BCjmptab:
881         case BCifthen:
882         case BCswitch:
883         {
884             assert(!(bl.Bflags & BFLepilog));
885             doswitch(cdb,bl);               // hide messy details
886             break;
887         }
888 version (MARS)
889 {
890         case BCjcatch:          // D catch clause of try-catch
891             assert(ehmethod(funcsym_p) != EHmethod.EH_NONE);
892             // Mark all registers as destroyed. This will prevent
893             // register assignments to variables used in catch blocks.
894             getregs(cdb,lpadregs());
895 
896             if (config.ehmethod == EHmethod.EH_DWARF)
897             {
898                 /* Each block must have ESP set to the same value it was at the end
899                  * of the prolog. But the unwinder calls catch blocks with ESP set
900                  * at the value it was when the throwing function was called, which
901                  * may have arguments pushed on the stack.
902                  * This instruction will reset ESP to the correct offset from EBP.
903                  */
904                 cdb.gen1(ESCAPE | ESCfixesp);
905             }
906             goto case_goto;
907 }
908 version (SCPP)
909 {
910         case BCcatch:           // C++ catch clause of try-catch
911             // Mark all registers as destroyed. This will prevent
912             // register assignments to variables used in catch blocks.
913             getregs(cdb,allregs | mES);
914             goto case_goto;
915 
916         case BCtry:
917             usednteh |= EHtry;
918             if (config.exe == EX_WIN32)
919                 usednteh |= NTEHtry;
920             goto case_goto;
921 }
922         case BCgoto:
923             nextb = bl.nthSucc(0);
924             if ((MARS ||
925                  funcsym_p.Sfunc.Fflags3 & Fnteh) &&
926                 ehmethod(funcsym_p) != EHmethod.EH_DWARF &&
927                 bl.Btry != nextb.Btry &&
928                 nextb.BC != BC_finally)
929             {
930                 regm_t retregsx = 0;
931                 gencodelem(cdb,e,&retregsx,true);
932                 int toindex = nextb.Btry ? nextb.Btry.Bscope_index : -1;
933                 assert(bl.Btry);
934                 int fromindex = bl.Btry.Bscope_index;
935 version (MARS)
936 {
937                 if (toindex + 1 == fromindex)
938                 {   // Simply call __finally
939                     if (bl.Btry &&
940                         bl.Btry.nthSucc(1).BC == BCjcatch)
941                     {
942                         goto L5;        // it's a try-catch, not a try-finally
943                     }
944                 }
945 }
946                 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) ||
947                     config.ehmethod == EHmethod.EH_SEH)
948                 {
949                     nteh_unwind(cdb,0,toindex);
950                 }
951                 else
952                 {
953 version (MARS)
954 {
955                 if (toindex + 1 <= fromindex)
956                 {
957                     //c = cat(c, linux_unwind(0, toindex));
958                     block *bt;
959 
960                     //printf("B%d: fromindex = %d, toindex = %d\n", bl.Bdfoidx, fromindex, toindex);
961                     bt = bl;
962                     while ((bt = bt.Btry) != null && bt.Bscope_index != toindex)
963                     {   block *bf;
964 
965                         //printf("\tbt.Bscope_index = %d, bt.Blast_index = %d\n", bt.Bscope_index, bt.Blast_index);
966                         bf = bt.nthSucc(1);
967                         // Only look at try-finally blocks
968                         if (bf.BC == BCjcatch)
969                             continue;
970 
971                         if (bf == nextb)
972                             continue;
973                         //printf("\tbf = B%d, nextb = B%d\n", bf.Bdfoidx, nextb.Bdfoidx);
974                         if (nextb.BC == BCgoto &&
975                             !nextb.Belem &&
976                             bf == nextb.nthSucc(0))
977                             continue;
978 
979                         // call __finally
980                         cdb.append(callFinallyBlock(bf.nthSucc(0), retregsx));
981                     }
982                 }
983 }
984                 }
985                 goto L5;
986             }
987         case_goto:
988         {
989             regm_t retregsx = 0;
990             gencodelem(cdb,e,&retregsx,true);
991             if (anyspill)
992             {   // Add in the epilog code
993                 CodeBuilder cdbstore; cdbstore.ctor();
994                 CodeBuilder cdbload;  cdbload.ctor();
995 
996                 for (int i = 0; i < anyspill; i++)
997                 {   Symbol *s = globsym.tab[i];
998 
999                     if (s.Sflags & SFLspill &&
1000                         vec_testbit(dfoidx,s.Srange))
1001                     {
1002                         s.Sfl = sflsave[i];    // undo block register assignments
1003                         cgreg_spillreg_epilog(bl,s,cdbstore,cdbload);
1004                     }
1005                 }
1006                 cdb.append(cdbstore);
1007                 cdb.append(cdbload);
1008             }
1009             nextb = bl.nthSucc(0);
1010             goto L5;
1011         }
1012 
1013         case BC_try:
1014             if (config.ehmethod == EHmethod.EH_NONE || funcsym_p.Sfunc.Fflags3 & Feh_none)
1015             {
1016                 /* Need to use frame pointer to access locals, not the stack pointer,
1017                  * because we'll be calling the BC_finally blocks and the stack will be off.
1018                  */
1019                 needframe = 1;
1020             }
1021             else if (config.ehmethod == EHmethod.EH_SEH || config.ehmethod == EHmethod.EH_WIN32)
1022             {
1023                 usednteh |= NTEH_try;
1024                 nteh_usevars();
1025             }
1026             else
1027                 usednteh |= EHtry;
1028             goto case_goto;
1029 
1030         case BC_finally:
1031             if (ehmethod(funcsym_p) == EHmethod.EH_DWARF)
1032             {
1033                 // Mark scratch registers as destroyed.
1034                 getregsNoSave(lpadregs());
1035 
1036                 regm_t retregsx = 0;
1037                 gencodelem(cdb,bl.Belem,&retregsx,true);
1038 
1039                 // JMP bl.nthSucc(1)
1040                 nextb = bl.nthSucc(1);
1041 
1042                 goto L5;
1043             }
1044             else
1045             {
1046                 if (config.ehmethod == EHmethod.EH_SEH ||
1047                     config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none))
1048                 {
1049                     // Mark all registers as destroyed. This will prevent
1050                     // register assignments to variables used in finally blocks.
1051                     getregsNoSave(lpadregs());
1052                 }
1053 
1054                 assert(!e);
1055                 // Generate CALL to finalizer code
1056                 cdb.append(callFinallyBlock(bl.nthSucc(0), 0));
1057 
1058                 // JMP bl.nthSucc(1)
1059                 nextb = bl.nthSucc(1);
1060 
1061                 goto L5;
1062             }
1063 
1064         case BC_lpad:
1065         {
1066             assert(ehmethod(funcsym_p) == EHmethod.EH_DWARF);
1067             // Mark all registers as destroyed. This will prevent
1068             // register assignments to variables used in finally blocks.
1069             getregsNoSave(lpadregs());
1070 
1071             regm_t retregsx = 0;
1072             gencodelem(cdb,bl.Belem,&retregsx,true);
1073 
1074             // JMP bl.nthSucc(0)
1075             nextb = bl.nthSucc(0);
1076             goto L5;
1077         }
1078 
1079         case BC_ret:
1080         {
1081             regm_t retregsx = 0;
1082             gencodelem(cdb,e,&retregsx,true);
1083             if (ehmethod(funcsym_p) == EHmethod.EH_DWARF)
1084             {
1085             }
1086             else
1087                 cdb.gen1(0xC3);   // RET
1088             break;
1089         }
1090 
1091 static if (NTEXCEPTIONS)
1092 {
1093         case BC_except:
1094         {
1095             assert(!e);
1096             usednteh |= NTEH_except;
1097             nteh_setsp(cdb,0x8B);
1098             getregsNoSave(allregs);
1099             nextb = bl.nthSucc(0);
1100             goto L5;
1101         }
1102         case BC_filter:
1103         {
1104             nteh_filter(cdb, bl);
1105             // Mark all registers as destroyed. This will prevent
1106             // register assignments to variables used in filter blocks.
1107             getregsNoSave(allregs);
1108             regm_t retregsx = regmask(e.Ety, TYnfunc);
1109             gencodelem(cdb,e,&retregsx,true);
1110             cdb.gen1(0xC3);   // RET
1111             break;
1112         }
1113 }
1114 
1115         case BCretexp:
1116             retregs = regmask(e.Ety, funcsym_p.ty());
1117 
1118             // For the final load into the return regs, don't set regcon.used,
1119             // so that the optimizer can potentially use retregs for register
1120             // variable assignments.
1121 
1122             if (config.flags4 & CFG4optimized)
1123             {   regm_t usedsave;
1124 
1125                 docommas(cdb,&e);
1126                 usedsave = regcon.used;
1127                 if (!OTleaf(e.Eoper))
1128                     gencodelem(cdb,e,&retregs,true);
1129                 else
1130                 {
1131                     if (e.Eoper == OPconst)
1132                         regcon.mvar = 0;
1133                     gencodelem(cdb,e,&retregs,true);
1134                     regcon.used = usedsave;
1135                     if (e.Eoper == OPvar)
1136                     {   Symbol *s = e.EV.Vsym;
1137 
1138                         if (s.Sfl == FLreg && s.Sregm != mAX)
1139                             *retsym = s;
1140                     }
1141                 }
1142             }
1143             else
1144             {
1145                 gencodelem(cdb,e,&retregs,true);
1146             }
1147             goto L4;
1148 
1149         case BCret:
1150         case BCexit:
1151             retregs = 0;
1152             gencodelem(cdb,e,&retregs,true);
1153         L4:
1154             if (retregs == mST0)
1155             {   assert(global87.stackused == 1);
1156                 pop87();                // account for return value
1157             }
1158             else if (retregs == mST01)
1159             {   assert(global87.stackused == 2);
1160                 pop87();
1161                 pop87();                // account for return value
1162             }
1163 
1164             if (bl.BC == BCexit)
1165             {
1166                 if (config.flags4 & CFG4optimized)
1167                     mfuncreg = mfuncregsave;
1168             }
1169             else if (MARS || usednteh & NTEH_try)
1170             {
1171                 block *bt = bl;
1172                 while ((bt = bt.Btry) != null)
1173                 {
1174                     block *bf = bt.nthSucc(1);
1175 version (MARS)
1176 {
1177                     // Only look at try-finally blocks
1178                     if (bf.BC == BCjcatch)
1179                     {
1180                         continue;
1181                     }
1182 }
1183                     if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) ||
1184                         config.ehmethod == EHmethod.EH_SEH)
1185                     {
1186                         if (bt.Bscope_index == 0)
1187                         {
1188                             // call __finally
1189                             CodeBuilder cdbs; cdbs.ctor();
1190                             CodeBuilder cdbr; cdbr.ctor();
1191 
1192                             nteh_gensindex(cdb,-1);
1193                             gensaverestore(retregs,cdbs,cdbr);
1194                             cdb.append(cdbs);
1195                             cdb.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf.nthSucc(0));
1196                             regcon.immed.mval = 0;
1197                             cdb.append(cdbr);
1198                         }
1199                         else
1200                         {
1201                             nteh_unwind(cdb,retregs,~0);
1202                         }
1203                         break;
1204                     }
1205                     else
1206                     {
1207                         // call __finally
1208                         cdb.append(callFinallyBlock(bf.nthSucc(0), retregs));
1209                     }
1210                 }
1211             }
1212             break;
1213 
1214         case BCasm:
1215         {
1216             assert(!e);
1217             // Mark destroyed registers
1218             CodeBuilder cdbx; cdbx.ctor();
1219             getregs(cdbx,iasm_regs(bl));         // mark destroyed registers
1220             code *c = cdbx.finish();
1221             if (bl.Bsucc)
1222             {   nextb = bl.nthSucc(0);
1223                 if (!bl.Bnext)
1224                 {
1225                     cdb.append(bl.Bcode);
1226                     cdb.append(c);
1227                     goto L5;
1228                 }
1229                 if (nextb != bl.Bnext &&
1230                     bl.Bnext &&
1231                     !(bl.Bnext.BC == BCgoto &&
1232                      !bl.Bnext.Belem &&
1233                      nextb == bl.Bnext.nthSucc(0)))
1234                 {
1235                     // See if already have JMP at end of block
1236                     code *cl = code_last(bl.Bcode);
1237                     if (!cl || cl.Iop != JMP)
1238                     {
1239                         cdb.append(bl.Bcode);
1240                         cdb.append(c);
1241                         goto L5;        // add JMP at end of block
1242                     }
1243                 }
1244             }
1245             cdb.append(bl.Bcode);
1246             break;
1247         }
1248 
1249         default:
1250             debug
1251             printf("bl.BC = %d\n",bl.BC);
1252             assert(0);
1253     }
1254 }
1255 
1256 /***********************************************
1257  * Struct necessary for sorting switch cases.
1258  */
1259 
1260 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
1261 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
1262 
1263 extern (C)  // qsort cmp functions need to be "C"
1264 {
1265 struct CaseVal
1266 {
1267     targ_ullong val;
1268     block *target;
1269 
1270     /* Sort function for qsort() */
1271     extern (C) static nothrow int cmp(scope const(void*) p, scope const(void*) q)
1272     {
1273         const(CaseVal)* c1 = cast(const(CaseVal)*)p;
1274         const(CaseVal)* c2 = cast(const(CaseVal)*)q;
1275         return (c1.val < c2.val) ? -1 : ((c1.val == c2.val) ? 0 : 1);
1276     }
1277 }
1278 }
1279 
1280 /***
1281  * Generate comparison of [reg2,reg] with val
1282  */
1283 private void cmpval(ref CodeBuilder cdb, targ_llong val, uint sz, reg_t reg, reg_t reg2, reg_t sreg)
1284 {
1285     if (I64 && sz == 8)
1286     {
1287         assert(reg2 == NOREG);
1288         if (val == cast(int)val)    // if val is a 64 bit value sign-extended from 32 bits
1289         {
1290             cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val);     // CMP reg,value32
1291             cdb.last().Irex |= REX_W;                  // 64 bit operand
1292         }
1293         else
1294         {
1295             assert(sreg != NOREG);
1296             movregconst(cdb,sreg,cast(targ_size_t)val,64);  // MOV sreg,val64
1297             genregs(cdb,0x3B,reg,sreg);    // CMP reg,sreg
1298             code_orrex(cdb.last(), REX_W);
1299             getregsNoSave(mask(sreg));                  // don't remember we loaded this constant
1300         }
1301     }
1302     else if (reg2 == NOREG)
1303         cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val);         // CMP reg,casevalue
1304     else
1305     {
1306         cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val));  // CMP reg2,MSREG(casevalue)
1307         code *cnext = gennop(null);
1308         genjmp(cdb,JNE,FLcode,cast(block *) cnext);  // JNE cnext
1309         cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)val);          // CMP reg,casevalue
1310         cdb.append(cnext);
1311     }
1312 }
1313 
1314 private void ifthen(ref CodeBuilder cdb, CaseVal *casevals, size_t ncases,
1315         uint sz, reg_t reg, reg_t reg2, reg_t sreg, block *bdefault, bool last)
1316 {
1317     if (ncases >= 4 && config.flags4 & CFG4speed)
1318     {
1319         size_t pivot = ncases >> 1;
1320 
1321         // Compares for casevals[0..pivot]
1322         CodeBuilder cdb1; cdb1.ctor();
1323         ifthen(cdb1, casevals, pivot, sz, reg, reg2, sreg, bdefault, true);
1324 
1325         // Compares for casevals[pivot+1..ncases]
1326         CodeBuilder cdb2; cdb2.ctor();
1327         ifthen(cdb2, casevals + pivot + 1, ncases - pivot - 1, sz, reg, reg2, sreg, bdefault, last);
1328         code *c2 = gennop(null);
1329 
1330         // Compare for caseval[pivot]
1331         cmpval(cdb, casevals[pivot].val, sz, reg, reg2, sreg);
1332         genjmp(cdb,JE,FLblock,casevals[pivot].target); // JE target
1333         // Note uint jump here, as cases were sorted using uint comparisons
1334         genjmp(cdb,JA,FLcode,cast(block *) c2);           // JG c2
1335 
1336         cdb.append(cdb1);
1337         cdb.append(c2);
1338         cdb.append(cdb2);
1339     }
1340     else
1341     {   // Not worth doing a binary search, just do a sequence of CMP/JE
1342         for (size_t n = 0; n < ncases; n++)
1343         {
1344             targ_llong val = casevals[n].val;
1345             cmpval(cdb, val, sz, reg, reg2, sreg);
1346             code *cnext = null;
1347             if (reg2 != NOREG)
1348             {
1349                 cnext = gennop(null);
1350                 genjmp(cdb,JNE,FLcode,cast(block *) cnext);  // JNE cnext
1351                 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val));   // CMP reg2,MSREG(casevalue)
1352             }
1353             genjmp(cdb,JE,FLblock,casevals[n].target);   // JE caseaddr
1354             cdb.append(cnext);
1355         }
1356 
1357         if (last)       // if default is not next block
1358             genjmp(cdb,JMP,FLblock,bdefault);
1359     }
1360 }
1361 
1362 /*******************************
1363  * Generate code for blocks ending in a switch statement.
1364  * Take BCswitch and decide on
1365  *      BCifthen        use if - then code
1366  *      BCjmptab        index into jump table
1367  *      BCswitch        search table for match
1368  */
1369 
1370 void doswitch(ref CodeBuilder cdb, block *b)
1371 {
1372     targ_ulong msw;
1373 
1374     // If switch tables are in code segment and we need a CS: override to get at them
1375     bool csseg = cast(bool)(config.flags & CFGromable);
1376 
1377     //printf("doswitch(%d)\n", b.BC);
1378     elem *e = b.Belem;
1379     elem_debug(e);
1380     docommas(cdb,&e);
1381     cgstate.stackclean++;
1382     tym_t tys = tybasic(e.Ety);
1383     int sz = _tysize[tys];
1384     bool dword = (sz == 2 * REGSIZE);
1385     bool mswsame = true;                // assume all msw's are the same
1386     targ_llong *p = b.Bswitch;          // pointer to case data
1387     assert(p);
1388     uint ncases = cast(uint)*p++;       // number of cases
1389 
1390     targ_llong vmax = MINLL;            // smallest possible llong
1391     targ_llong vmin = MAXLL;            // largest possible llong
1392     for (uint n = 0; n < ncases; n++)   // find max and min case values
1393     {
1394         targ_llong val = *p++;
1395         if (val > vmax) vmax = val;
1396         if (val < vmin) vmin = val;
1397         if (REGSIZE == 2)
1398         {
1399             ushort ms = (val >> 16) & 0xFFFF;
1400             if (n == 0)
1401                 msw = ms;
1402             else if (msw != ms)
1403                 mswsame = 0;
1404         }
1405         else // REGSIZE == 4
1406         {
1407             targ_ulong ms = (val >> 32) & 0xFFFFFFFF;
1408             if (n == 0)
1409                 msw = ms;
1410             else if (msw != ms)
1411                 mswsame = 0;
1412         }
1413     }
1414     p -= ncases;
1415     //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin);
1416 
1417     /* Three kinds of switch strategies - pick one
1418      */
1419     if (ncases <= 3)
1420         goto Lifthen;
1421     else if (I16 && cast(targ_ullong)(vmax - vmin) <= ncases * 2)
1422         goto Ljmptab;           // >=50% of the table is case values, rest is default
1423     else if (cast(targ_ullong)(vmax - vmin) <= ncases * 3)
1424         goto Ljmptab;           // >= 33% of the table is case values, rest is default
1425     else if (I16)
1426         goto Lswitch;
1427     else
1428         goto Lifthen;
1429 
1430     /*************************************************************************/
1431     {   // generate if-then sequence
1432     Lifthen:
1433         regm_t retregs = ALLREGS;
1434         b.BC = BCifthen;
1435         scodelem(cdb,e,&retregs,0,true);
1436         reg_t reg, reg2;
1437         if (dword)
1438         {   reg = findreglsw(retregs);
1439             reg2 = findregmsw(retregs);
1440         }
1441         else
1442         {
1443             reg = findreg(retregs);     // reg that result is in
1444             reg2 = NOREG;
1445         }
1446         list_t bl = b.Bsucc;
1447         block *bdefault = b.nthSucc(0);
1448         if (dword && mswsame)
1449         {
1450             cdb.genc2(0x81,modregrm(3,7,reg2),msw);   // CMP reg2,MSW
1451             genjmp(cdb,JNE,FLblock,bdefault);  // JNE default
1452             reg2 = NOREG;
1453         }
1454 
1455         reg_t sreg = NOREG;                          // may need a scratch register
1456 
1457         // Put into casevals[0..ncases] so we can sort then slice
1458         CaseVal *casevals = cast(CaseVal *)malloc(ncases * CaseVal.sizeof);
1459         assert(casevals);
1460         for (uint n = 0; n < ncases; n++)
1461         {
1462             casevals[n].val = p[n];
1463             bl = list_next(bl);
1464             casevals[n].target = list_block(bl);
1465 
1466             // See if we need a scratch register
1467             if (sreg == NOREG && I64 && sz == 8 && p[n] != cast(int)p[n])
1468             {   regm_t regm = ALLREGS & ~mask(reg);
1469                 allocreg(cdb,&regm, &sreg, TYint);
1470             }
1471         }
1472 
1473         // Sort cases so we can do a runtime binary search
1474         qsort(casevals, ncases, CaseVal.sizeof, &CaseVal.cmp);
1475 
1476         //for (uint n = 0; n < ncases; n++)
1477             //printf("casevals[%lld] = x%x\n", n, casevals[n].val);
1478 
1479         // Generate binary tree of comparisons
1480         ifthen(cdb, casevals, ncases, sz, reg, reg2, sreg, bdefault, bdefault != b.Bnext);
1481 
1482         free(casevals);
1483 
1484         cgstate.stackclean--;
1485         return;
1486     }
1487 
1488     /*************************************************************************/
1489     {
1490         // Use switch value to index into jump table
1491     Ljmptab:
1492         //printf("Ljmptab:\n");
1493 
1494         b.BC = BCjmptab;
1495 
1496         /* If vmin is small enough, we can just set it to 0 and the jump
1497          * table entries from 0..vmin-1 can be set with the default target.
1498          * This saves the SUB instruction.
1499          * Must be same computation as used in outjmptab().
1500          */
1501         if (vmin > 0 && vmin <= _tysize[TYint])
1502             vmin = 0;
1503 
1504         b.Btablesize = cast(int) (vmax - vmin + 1) * tysize(TYnptr);
1505         regm_t retregs = IDXREGS;
1506         if (dword)
1507             retregs |= mMSW;
1508 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1509 {
1510         if (I32 && config.flags3 & CFG3pic)
1511             retregs &= ~mBX;                            // need EBX for GOT
1512 }
1513         bool modify = (I16 || I64 || vmin);
1514         scodelem(cdb,e,&retregs,0,!modify);
1515         reg_t reg = findreg(retregs & IDXREGS); // reg that result is in
1516         reg_t reg2;
1517         if (dword)
1518             reg2 = findregmsw(retregs);
1519         if (modify)
1520         {
1521             assert(!(retregs & regcon.mvar));
1522             getregs(cdb,retregs);
1523         }
1524         if (vmin)                       // if there is a minimum
1525         {
1526             cdb.genc2(0x81,modregrm(3,5,reg),cast(targ_size_t)vmin); // SUB reg,vmin
1527             if (dword)
1528             {   cdb.genc2(0x81,modregrm(3,3,reg2),cast(targ_size_t)MSREG(vmin)); // SBB reg2,vmin
1529                 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
1530             }
1531         }
1532         else if (dword)
1533         {   gentstreg(cdb,reg2);              // TEST reg2,reg2
1534             genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
1535         }
1536         if (vmax - vmin != REGMASK)     // if there is a maximum
1537         {                               // CMP reg,vmax-vmin
1538             cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)(vmax-vmin));
1539             if (I64 && sz == 8)
1540                 code_orrex(cdb.last(), REX_W);
1541             genjmp(cdb,JA,FLblock,b.nthSucc(0));  // JA default
1542         }
1543         if (I64)
1544         {
1545             if (!vmin)
1546             {   // Need to clear out high 32 bits of reg
1547                 // Use 8B instead of 89, as 89 will be optimized away as a NOP
1548                 genregs(cdb,0x8B,reg,reg);                 // MOV reg,reg
1549             }
1550             if (config.flags3 & CFG3pic || config.exe == EX_WIN64)
1551             {
1552                 /* LEA    R1,disp[RIP]          48 8D 05 00 00 00 00
1553                  * MOVSXD R2,[reg*4][R1]        48 63 14 B8
1554                  * LEA    R1,[R1][R2]           48 8D 04 02
1555                  * JMP    R1                    FF E0
1556                  */
1557                 reg_t r1;
1558                 regm_t scratchm = ALLREGS & ~mask(reg);
1559                 allocreg(cdb,&scratchm,&r1,TYint);
1560                 reg_t r2;
1561                 scratchm = ALLREGS & ~(mask(reg) | mask(r1));
1562                 allocreg(cdb,&scratchm,&r2,TYint);
1563 
1564                 CodeBuilder cdbe; cdbe.ctor();
1565                 cdbe.genc1(LEA,(REX_W << 16) | modregxrm(0,r1,5),FLswitch,0);        // LEA R1,disp[RIP]
1566                 cdbe.last().IEV1.Vswitch = b;
1567                 cdbe.gen2sib(0x63,(REX_W << 16) | modregxrm(0,r2,4), modregxrmx(2,reg,r1)); // MOVSXD R2,[reg*4][R1]
1568                 cdbe.gen2sib(LEA,(REX_W << 16) | modregxrm(0,r1,4),modregxrmx(0,r1,r2));    // LEA R1,[R1][R2]
1569                 cdbe.gen2(0xFF,modregrmx(3,4,r1));                                          // JMP R1
1570 
1571                 b.Btablesize = cast(int) (vmax - vmin + 1) * 4;
1572                 code *ce = cdbe.finish();
1573                 pinholeopt(ce, null);
1574 
1575                 cdb.append(cdbe);
1576             }
1577             else
1578             {
1579                 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0);   // JMP disp[reg*8]
1580                 cdb.last().IEV1.Vswitch = b;
1581                 cdb.last().Isib = modregrm(3,reg & 7,5);
1582                 if (reg & 8)
1583                     cdb.last().Irex |= REX_X;
1584             }
1585         }
1586         else if (I32)
1587         {
1588 static if (JMPJMPTABLE)
1589 {
1590             /* LEA jreg,offset ctable[reg][reg * 4]
1591                JMP jreg
1592               ctable:
1593                JMP case0
1594                JMP case1
1595                ...
1596              */
1597             CodeBuilder ctable; ctable.ctor();
1598             block *bdef = b.nthSucc(0);
1599             targ_llong u;
1600             for (u = vmin; ; u++)
1601             {   block *targ = bdef;
1602                 for (n = 0; n < ncases; n++)
1603                 {
1604                     if (p[n] == u)
1605                     {   targ = b.nthSucc(n + 1);
1606                         break;
1607                     }
1608                 }
1609                 genjmp(ctable,JMP,FLblock,targ);
1610                 ctable.last().Iflags |= CFjmp5;           // don't shrink these
1611                 if (u == vmax)
1612                     break;
1613             }
1614 
1615             // Allocate scratch register jreg
1616             regm_t scratchm = ALLREGS & ~mask(reg);
1617             uint jreg = AX;
1618             allocreg(cdb,&scratchm,&jreg,TYint);
1619 
1620             // LEA jreg, offset ctable[reg][reg*4]
1621             cdb.genc1(LEA,modregrm(2,jreg,4),FLcode,6);
1622             cdb.last().Isib = modregrm(2,reg,reg);
1623             cdb.gen2(0xFF,modregrm(3,4,jreg));      // JMP jreg
1624             cdb.append(ctable);
1625             b.Btablesize = 0;
1626             cgstate.stackclean--;
1627             return;
1628 }
1629 else static if (TARGET_OSX)
1630 {
1631             /*     CALL L1
1632              * L1: POP  R1
1633              *     ADD  R1,disp[reg*4][R1]
1634              *     JMP  R1
1635              */
1636             // Allocate scratch register r1
1637             regm_t scratchm = ALLREGS & ~mask(reg);
1638             reg_t r1;
1639             allocreg(cdb,&scratchm,&r1,TYint);
1640 
1641             cdb.genc2(CALL,0,0);                           //     CALL L1
1642             cdb.gen1(0x58 + r1);                           // L1: POP R1
1643             cdb.genc1(0x03,modregrm(2,r1,4),FLswitch,0);   // ADD R1,disp[reg*4][EBX]
1644             cdb.last().IEV1.Vswitch = b;
1645             cdb.last().Isib = modregrm(2,reg,r1);
1646             cdb.gen2(0xFF,modregrm(3,4,r1));               // JMP R1
1647 }
1648 else
1649 {
1650             if (config.flags3 & CFG3pic)
1651             {
1652                 /* MOV  R1,EBX
1653                  * SUB  R1,funcsym_p@GOTOFF[offset][reg*4][EBX]
1654                  * JMP  R1
1655                  */
1656 
1657                 // Load GOT in EBX
1658                 load_localgot(cdb);
1659 
1660                 // Allocate scratch register r1
1661                 regm_t scratchm = ALLREGS & ~(mask(reg) | mBX);
1662                 reg_t r1;
1663                 allocreg(cdb,&scratchm,&r1,TYint);
1664 
1665                 genmovreg(cdb,r1,BX);              // MOV R1,EBX
1666                 cdb.genc1(0x2B,modregxrm(2,r1,4),FLswitch,0);   // SUB R1,disp[reg*4][EBX]
1667                 cdb.last().IEV1.Vswitch = b;
1668                 cdb.last().Isib = modregrm(2,reg,BX);
1669                 cdb.gen2(0xFF,modregrmx(3,4,r1));               // JMP R1
1670             }
1671             else
1672             {
1673                 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0);     // JMP disp[idxreg*4]
1674                 cdb.last().IEV1.Vswitch = b;
1675                 cdb.last().Isib = modregrm(2,reg,5);
1676             }
1677 }
1678         }
1679         else if (I16)
1680         {
1681             cdb.gen2(0xD1,modregrm(3,4,reg));                   // SHL reg,1
1682             uint rm = getaddrmode(retregs) | modregrm(0,4,0);
1683             cdb.genc1(0xFF,rm,FLswitch,0);                  // JMP [CS:]disp[idxreg]
1684             cdb.last().IEV1.Vswitch = b;
1685             cdb.last().Iflags |= csseg ? CFcs : 0;                       // segment override
1686         }
1687         else
1688             assert(0);
1689         cgstate.stackclean--;
1690         return;
1691     }
1692 
1693     /*************************************************************************/
1694     {
1695         /* Scan a table of case values, and jump to corresponding address.
1696          * Since it relies on REPNE SCASW, it has really nothing to recommend it
1697          * over Lifthen for 32 and 64 bit code.
1698          * Note that it has not been tested with MACHOBJ (OSX).
1699          */
1700     Lswitch:
1701         regm_t retregs = mAX;                  // SCASW requires AX
1702         if (dword)
1703             retregs |= mDX;
1704         else if (ncases <= 6 || config.flags4 & CFG4speed)
1705             goto Lifthen;
1706         scodelem(cdb,e,&retregs,0,true);
1707         if (dword && mswsame)
1708         {   /* CMP DX,MSW       */
1709             cdb.genc2(0x81,modregrm(3,7,DX),msw);
1710             genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
1711         }
1712         getregs(cdb,mCX|mDI);
1713 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1714 {
1715         if (config.flags3 & CFG3pic)
1716         {   // Add in GOT
1717             getregs(cdb,mDX);
1718             cdb.genc2(CALL,0,0);        //     CALL L1
1719             cdb.gen1(0x58 + DI);        // L1: POP EDI
1720 
1721                                         //     ADD EDI,_GLOBAL_OFFSET_TABLE_+3
1722             Symbol *gotsym = Obj.getGOTsym();
1723             cdb.gencs(0x81,modregrm(3,0,DI),FLextern,gotsym);
1724             cdb.last().Iflags = CFoff;
1725             cdb.last().IEV2.Voffset = 3;
1726 
1727             makeitextern(gotsym);
1728 
1729             genmovreg(cdb, DX, DI);    // MOV EDX, EDI
1730                                         // ADD EDI,offset of switch table
1731             cdb.gencs(0x81,modregrm(3,0,DI),FLswitch,null);
1732             cdb.last().IEV2.Vswitch = b;
1733         }
1734 }
1735         if (!(config.flags3 & CFG3pic))
1736         {
1737                                         // MOV DI,offset of switch table
1738             cdb.gencs(0xC7,modregrm(3,0,DI),FLswitch,null);
1739             cdb.last().IEV2.Vswitch = b;
1740         }
1741         movregconst(cdb,CX,ncases,0);    // MOV CX,ncases
1742 
1743         /* The switch table will be accessed through ES:DI.
1744          * Therefore, load ES with proper segment value.
1745          */
1746         if (config.flags3 & CFG3eseqds)
1747         {
1748             assert(!csseg);
1749             getregs(cdb,mCX);           // allocate CX
1750         }
1751         else
1752         {
1753             getregs(cdb,mES|mCX);       // allocate ES and CX
1754             cdb.gen1(csseg ? 0x0E : 0x1E);      // PUSH CS/DS
1755             cdb.gen1(0x07);                     // POP  ES
1756         }
1757 
1758         targ_size_t disp = (ncases - 1) * _tysize[TYint];  // displacement to jump table
1759         if (dword && !mswsame)
1760         {
1761 
1762             /* Build the following:
1763                 L1:     SCASW
1764                         JNE     L2
1765                         CMP     DX,[CS:]disp[DI]
1766                 L2:     LOOPNE  L1
1767              */
1768 
1769             const int mod = (disp > 127) ? 2 : 1;         // displacement size
1770             code *cloop = genc2(null,0xE0,0,-7 - mod - csseg);   // LOOPNE scasw
1771             cdb.gen1(0xAF);                                      // SCASW
1772             code_orflag(cdb.last(),CFtarg2);                     // target of jump
1773             genjmp(cdb,JNE,FLcode,cast(block *) cloop); // JNE loop
1774                                                                  // CMP DX,[CS:]disp[DI]
1775             cdb.genc1(0x39,modregrm(mod,DX,5),FLconst,disp);
1776             cdb.last().Iflags |= csseg ? CFcs : 0;              // possible seg override
1777             cdb.append(cloop);
1778             disp += ncases * _tysize[TYint];           // skip over msw table
1779         }
1780         else
1781         {
1782             cdb.gen1(0xF2);              // REPNE
1783             cdb.gen1(0xAF);              // SCASW
1784         }
1785         genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
1786         const int mod = (disp > 127) ? 2 : 1;     // 1 or 2 byte displacement
1787         if (csseg)
1788             cdb.gen1(SEGCS);            // table is in code segment
1789 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1790 {
1791         if (config.flags3 & CFG3pic)
1792         {                               // ADD EDX,(ncases-1)*2[EDI]
1793             cdb.genc1(0x03,modregrm(mod,DX,7),FLconst,disp);
1794                                         // JMP EDX
1795             cdb.gen2(0xFF,modregrm(3,4,DX));
1796         }
1797 }
1798         if (!(config.flags3 & CFG3pic))
1799         {                               // JMP (ncases-1)*2[DI]
1800             cdb.genc1(0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp);
1801             cdb.last().Iflags |= csseg ? CFcs : 0;
1802         }
1803         b.Btablesize = disp + _tysize[TYint] + ncases * tysize(TYnptr);
1804         //assert(b.Bcode);
1805         cgstate.stackclean--;
1806         return;
1807     }
1808 }
1809 
1810 /******************************
1811  * Output data block for a jump table (BCjmptab).
1812  * The 'holes' in the table get filled with the
1813  * default label.
1814  */
1815 
1816 void outjmptab(block *b)
1817 {
1818     if (JMPJMPTABLE && I32)
1819         return;
1820 
1821     targ_llong *p = b.Bswitch;               // pointer to case data
1822     size_t ncases = cast(size_t)*p++;        // number of cases
1823 
1824     /* Find vmin and vmax, the range of the table will be [vmin .. vmax + 1]
1825      * Must be same computation as used in doswitch().
1826      */
1827     targ_llong vmax = MINLL;                 // smallest possible llong
1828     targ_llong vmin = MAXLL;                 // largest possible llong
1829     for (size_t n = 0; n < ncases; n++)      // find min case value
1830     {   targ_llong val = p[n];
1831         if (val > vmax) vmax = val;
1832         if (val < vmin) vmin = val;
1833     }
1834     if (vmin > 0 && vmin <= _tysize[TYint])
1835         vmin = 0;
1836     assert(vmin <= vmax);
1837 
1838     /* Segment and offset into which the jump table will be emitted
1839      */
1840     int jmpseg = objmod.jmpTableSegment(funcsym_p);
1841     targ_size_t *poffset = &Offset(jmpseg);
1842 
1843     /* Align start of jump table
1844      */
1845     targ_size_t alignbytes = _align(0,*poffset) - *poffset;
1846     objmod.lidata(jmpseg,*poffset,alignbytes);
1847     assert(*poffset == b.Btableoffset);        // should match precomputed value
1848 
1849     Symbol *gotsym = null;
1850     targ_size_t def = b.nthSucc(0).Boffset;  // default address
1851     for (targ_llong u = vmin; ; u++)
1852     {   targ_size_t targ = def;                     // default
1853         for (size_t n = 0; n < ncases; n++)
1854         {       if (p[n] == u)
1855                 {       targ = b.nthSucc(cast(int)(n + 1)).Boffset;
1856                         break;
1857                 }
1858         }
1859 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
1860 {
1861         if (I64)
1862         {
1863             if (config.flags3 & CFG3pic)
1864             {
1865                 objmod.reftodatseg(jmpseg,*poffset,targ + (u - vmin) * 4,funcsym_p.Sseg,CFswitch);
1866                 *poffset += 4;
1867             }
1868             else
1869             {
1870                 objmod.reftodatseg(jmpseg,*poffset,targ,funcsym_p.Sxtrnnum,CFoffset64 | CFswitch);
1871                 *poffset += 8;
1872             }
1873         }
1874         else
1875         {
1876             if (config.flags3 & CFG3pic)
1877             {
1878                 assert(config.flags & CFGromable);
1879                 // Want a GOTPC fixup to _GLOBAL_OFFSET_TABLE_
1880                 if (!gotsym)
1881                     gotsym = Obj.getGOTsym();
1882                 objmod.reftoident(jmpseg,*poffset,gotsym,*poffset - targ,CFswitch);
1883             }
1884             else
1885                 objmod.reftocodeseg(jmpseg,*poffset,targ);
1886             *poffset += 4;
1887         }
1888 }
1889 else static if (TARGET_OSX)
1890 {
1891         targ_size_t val;
1892         if (I64)
1893             val = targ - b.Btableoffset;
1894         else
1895             val = targ - b.Btablebase;
1896         objmod.write_bytes(SegData[jmpseg],4,&val);
1897 }
1898 else static if (TARGET_WINDOS)
1899 {
1900         if (I64)
1901         {
1902             targ_size_t val = targ - b.Btableoffset;
1903             objmod.write_bytes(SegData[jmpseg],4,&val);
1904         }
1905         else
1906         {
1907             objmod.reftocodeseg(jmpseg,*poffset,targ);
1908             *poffset += tysize(TYnptr);
1909         }
1910 }
1911 else
1912         assert(0);
1913 
1914         if (u == vmax)                  // for case that (vmax == ~0)
1915             break;
1916     }
1917 }
1918 
1919 
1920 /******************************
1921  * Output data block for a switch table.
1922  * Two consecutive tables, the first is the case value table, the
1923  * second is the address table.
1924  */
1925 
1926 void outswitab(block *b)
1927 {
1928     //printf("outswitab()\n");
1929     targ_llong *p = b.Bswitch;        // pointer to case data
1930     uint ncases = cast(uint)*p++;     // number of cases
1931 
1932     const int seg = objmod.jmpTableSegment(funcsym_p);
1933     targ_size_t *poffset = &Offset(seg);
1934     targ_size_t offset = *poffset;
1935     targ_size_t alignbytes = _align(0,*poffset) - *poffset;
1936     objmod.lidata(seg,*poffset,alignbytes);  // any alignment bytes necessary
1937     assert(*poffset == offset + alignbytes);
1938 
1939     uint sz = _tysize[TYint];
1940     assert(SegData[seg].SDseg == seg);
1941     for (uint n = 0; n < ncases; n++)          // send out value table
1942     {
1943         //printf("\tcase %d, offset = x%x\n", n, *poffset);
1944         objmod.write_bytes(SegData[seg],sz,p);
1945         p++;
1946     }
1947     offset += alignbytes + sz * ncases;
1948     assert(*poffset == offset);
1949 
1950     if (b.Btablesize == ncases * (REGSIZE * 2 + tysize(TYnptr)))
1951     {
1952         // Send out MSW table
1953         p -= ncases;
1954         for (uint n = 0; n < ncases; n++)
1955         {
1956             targ_size_t val = cast(targ_size_t)MSREG(*p);
1957             p++;
1958             objmod.write_bytes(SegData[seg],REGSIZE,&val);
1959         }
1960         offset += REGSIZE * ncases;
1961         assert(*poffset == offset);
1962     }
1963 
1964     list_t bl = b.Bsucc;
1965     for (uint n = 0; n < ncases; n++)          // send out address table
1966     {
1967         bl = list_next(bl);
1968         objmod.reftocodeseg(seg,*poffset,list_block(bl).Boffset);
1969         *poffset += tysize(TYnptr);
1970     }
1971     assert(*poffset == offset + ncases * tysize(TYnptr));
1972 }
1973 
1974 /*****************************
1975  * Return a jump opcode relevant to the elem for a JMP true.
1976  */
1977 
1978 int jmpopcode(elem *e)
1979 {
1980     tym_t tym;
1981     int zero,i,jp,op;
1982     static immutable ubyte[6][2][2] jops =
1983     [   /* <=  >   <   >=  ==  !=    <=0 >0  <0  >=0 ==0 !=0    */
1984        [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JS ,JNS,JE ,JNE] ], /* signed   */
1985        [ [JBE,JA ,JB ,JAE,JE ,JNE],[JE ,JNE,JB ,JAE,JE ,JNE] ], /* uint */
1986 /+
1987        [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JL ,JGE,JE ,JNE] ], /* real     */
1988        [ [JBE,JA ,JB ,JAE,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087     */
1989        [ [JA ,JBE,JAE,JB ,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 R   */
1990 +/
1991     ];
1992 
1993     enum
1994     {
1995         XP     = (JP  << 8),
1996         XNP    = (JNP << 8),
1997     }
1998     static immutable uint[26][1] jfops =
1999     /*   le     gt lt     ge  eqeq    ne     unord lg  leg  ule ul uge  */
2000     [
2001       [ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP,   JNE,JNP, JBE,JC,XP|JAE,
2002 
2003     /*  ug    ue ngt nge nlt    nle    ord nlg nleg nule nul nuge    nug     nue */
2004         XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP,  JA,  JNC,XNP|JB, XNP|JBE,JNE        ], /* 8087     */
2005     ];
2006 
2007     assert(e);
2008     while (e.Eoper == OPcomma ||
2009         /* The OTleaf(e.EV.E1.Eoper) is to line up with the case in cdeq() where  */
2010         /* we decide if mPSW is passed on when evaluating E2 or not.    */
2011          (e.Eoper == OPeq && OTleaf(e.EV.E1.Eoper)))
2012     {
2013         e = e.EV.E2;                      /* right operand determines it  */
2014     }
2015 
2016     op = e.Eoper;
2017     tym_t tymx = tybasic(e.Ety);
2018     bool needsNanCheck = tyfloating(tymx) && config.inline8087 &&
2019         (tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble ||
2020          tymx == TYcdouble || tymx == TYcfloat ||
2021          (tyxmmreg(tymx) && config.fpxmmregs && e.Ecount != e.Ecomsub) ||
2022          op == OPind ||
2023          (OTcall(op) && (regmask(tymx, tybasic(e.EV.E1.Eoper)) & (mST0 | XMMREGS))));
2024     if (e.Ecount != e.Ecomsub)          // comsubs just get Z bit set
2025     {
2026         if (needsNanCheck) // except for floating point values that need a NaN check
2027             return XP|JNE;
2028         else
2029             return JNE;
2030     }
2031     if (!OTrel(op))                       // not relational operator
2032     {
2033         if (needsNanCheck)
2034             return XP|JNE;
2035 
2036         if (op == OPu32_64) { e = e.EV.E1; op = e.Eoper; }
2037         if (op == OPu16_32) { e = e.EV.E1; op = e.Eoper; }
2038         if (op == OPu8_16) op = e.EV.E1.Eoper;
2039         return ((op >= OPbt && op <= OPbts) || op == OPbtst) ? JC : JNE;
2040     }
2041 
2042     if (e.EV.E2.Eoper == OPconst)
2043         zero = !boolres(e.EV.E2);
2044     else
2045         zero = 0;
2046 
2047     tym = e.EV.E1.Ety;
2048     if (tyfloating(tym))
2049     {
2050 static if (1)
2051 {
2052         i = 0;
2053         if (config.inline8087)
2054         {   i = 1;
2055 
2056 static if (1)
2057 {
2058             if (rel_exception(op) || config.flags4 & CFG4fastfloat)
2059             {
2060                 const bool NOSAHF = (I64 || config.fpxmmregs);
2061                 if (zero)
2062                 {
2063                     if (NOSAHF)
2064                         op = swaprel(op);
2065                 }
2066                 else if (NOSAHF)
2067                     op = swaprel(op);
2068                 else if (cmporder87(e.EV.E2))
2069                     op = swaprel(op);
2070                 else
2071                 { }
2072             }
2073             else
2074             {
2075                 if (zero && config.target_cpu < TARGET_80386)
2076                 { }
2077                 else
2078                     op = swaprel(op);
2079             }
2080 }
2081 else
2082 {
2083             if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386)
2084                 op = swaprel(op);
2085             else if (!zero &&
2086                 (cmporder87(e.EV.E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat)))
2087                 /* compare is reversed */
2088                 op = swaprel(op);
2089 }
2090         }
2091         jp = jfops[0][op - OPle];
2092         goto L1;
2093 }
2094 else
2095 {
2096         i = (config.inline8087) ? (3 + cmporder87(e.EV.E2)) : 2;
2097 }
2098     }
2099     else if (tyuns(tym) || tyuns(e.EV.E2.Ety))
2100         i = 1;
2101     else if (tyintegral(tym) || typtr(tym))
2102         i = 0;
2103     else
2104     {
2105         debug
2106         elem_print(e);
2107         WRTYxx(tym);
2108         assert(0);
2109     }
2110 
2111     jp = jops[i][zero][op - OPle];        /* table starts with OPle       */
2112 
2113     /* Try to rewrite uint comparisons so they rely on just the Carry flag
2114      */
2115     if (i == 1 && (jp == JA || jp == JBE) &&
2116         (e.EV.E2.Eoper != OPconst && e.EV.E2.Eoper != OPrelconst))
2117     {
2118         jp = (jp == JA) ? JC : JNC;
2119     }
2120 
2121 L1:
2122     debug
2123     if ((jp & 0xF0) != 0x70)
2124     {
2125         WROP(op);
2126         printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp);
2127     }
2128 
2129     assert((jp & 0xF0) == 0x70);
2130     return jp;
2131 }
2132 
2133 /**********************************
2134  * Append code to cdb which validates pointer described by
2135  * addressing mode in *pcs. Modify addressing mode in *pcs.
2136  * Params:
2137  *    cdb = append generated code to this
2138  *    pcs = original addressing mode to be updated
2139  *    keepmsk = mask of registers we must not destroy or use
2140  *              if (keepmsk & RMstore), this will be only a store operation
2141  *              into the lvalue
2142  */
2143 
2144 void cod3_ptrchk(ref CodeBuilder cdb,code *pcs,regm_t keepmsk)
2145 {
2146     ubyte sib;
2147     reg_t reg;
2148     uint flagsave;
2149 
2150     assert(!I64);
2151     if (!I16 && pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
2152         return;         // not designed to deal with 48 bit far pointers
2153 
2154     ubyte rm = pcs.Irm;
2155     assert(!(rm & 0x40));       // no disp8 or reg addressing modes
2156 
2157     // If the addressing mode is already a register
2158     reg = rm & 7;
2159     if (I16)
2160     {   static immutable ubyte[8] imode = [ BP,BP,BP,BP,SI,DI,BP,BX ];
2161 
2162         reg = imode[reg];               // convert [SI] to SI, etc.
2163     }
2164     regm_t idxregs = mask(reg);
2165     if ((rm & 0x80 && (pcs.IFL1 != FLoffset || pcs.IEV1.Vuns)) ||
2166         !(idxregs & ALLREGS)
2167        )
2168     {
2169         // Load the offset into a register, so we can push the address
2170         regm_t idxregs2 = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs
2171         assert(idxregs2);
2172         allocreg(cdb,&idxregs2,&reg,TYoffset);
2173 
2174         const opsave = pcs.Iop;
2175         flagsave = pcs.Iflags;
2176         pcs.Iop = LEA;
2177         pcs.Irm |= modregrm(0,reg,0);
2178         pcs.Iflags &= ~(CFopsize | CFss | CFes | CFcs);        // no prefix bytes needed
2179         cdb.gen(pcs);                 // LEA reg,EA
2180 
2181         pcs.Iflags = flagsave;
2182         pcs.Iop = opsave;
2183     }
2184 
2185     // registers destroyed by the function call
2186     //used = (mBP | ALLREGS | mES) & ~fregsaved;
2187     regm_t used = 0;                           // much less code generated this way
2188 
2189     code *cs2 = null;
2190     regm_t tosave = used & (keepmsk | idxregs);
2191     for (int i = 0; tosave; i++)
2192     {
2193         regm_t mi = mask(i);
2194 
2195         assert(i < REGMAX);
2196         if (mi & tosave)        /* i = register to save                 */
2197         {
2198             int push,pop;
2199 
2200             stackchanged = 1;
2201             if (i == ES)
2202             {   push = 0x06;
2203                 pop = 0x07;
2204             }
2205             else
2206             {   push = 0x50 + i;
2207                 pop = push | 8;
2208             }
2209             cdb.gen1(push);                     // PUSH i
2210             cs2 = cat(gen1(null,pop),cs2);      // POP i
2211             tosave &= ~mi;
2212         }
2213     }
2214 
2215     // For 16 bit models, push a far pointer
2216     if (I16)
2217     {
2218         int segreg;
2219 
2220         switch (pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
2221         {   case CFes:  segreg = 0x06;  break;
2222             case CFss:  segreg = 0x16;  break;
2223             case CFcs:  segreg = 0x0E;  break;
2224             case 0:     segreg = 0x1E;  break;  // DS
2225             default:
2226                 assert(0);
2227         }
2228 
2229         // See if we should default to SS:
2230         // (Happens when BP is part of the addressing mode)
2231         if (segreg == 0x1E && (rm & 0xC0) != 0xC0 &&
2232             rm & 2 && (rm & 7) != 7)
2233         {
2234             segreg = 0x16;
2235             if (config.wflags & WFssneds)
2236                 pcs.Iflags |= CFss;    // because BP won't be there anymore
2237         }
2238         cdb.gen1(segreg);               // PUSH segreg
2239     }
2240 
2241     cdb.gen1(0x50 + reg);               // PUSH reg
2242 
2243     // Rewrite the addressing mode in *pcs so it is just 0[reg]
2244     setaddrmode(pcs, idxregs);
2245     pcs.IFL1 = FLoffset;
2246     pcs.IEV1.Vuns = 0;
2247 
2248     // Call the validation function
2249     {
2250         makeitextern(getRtlsym(RTLSYM_PTRCHK));
2251 
2252         used &= ~(keepmsk | idxregs);           // regs destroyed by this exercise
2253         getregs(cdb,used);
2254                                                 // CALL __ptrchk
2255         cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_PTRCHK));
2256     }
2257 
2258     cdb.append(cs2);
2259 }
2260 
2261 /***********************************
2262  * Determine if BP can be used as a general purpose register.
2263  * Note parallels between this routine and prolog().
2264  * Returns:
2265  *      0       can't be used, needed for frame
2266  *      mBP     can be used
2267  */
2268 
2269 regm_t cod3_useBP()
2270 {
2271     tym_t tym;
2272     tym_t tyf;
2273 
2274     // Note that DOSX memory model cannot use EBP as a general purpose
2275     // register, as SS != DS.
2276     if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp))
2277         goto Lcant;
2278 
2279     if (anyiasm)
2280         goto Lcant;
2281 
2282     tyf = funcsym_p.ty();
2283     if (tyf & mTYnaked)                 // if no prolog/epilog for function
2284         goto Lcant;
2285 
2286     if (funcsym_p.Sfunc.Fflags3 & Ffakeeh)
2287     {
2288         goto Lcant;                     // need consistent stack frame
2289     }
2290 
2291     tym = tybasic(tyf);
2292     if (tym == TYifunc)
2293         goto Lcant;
2294 
2295     stackoffsets(0);
2296     localsize = Auto.offset + Fast.offset;                // an estimate only
2297 //    if (localsize)
2298     {
2299         if (!(config.flags4 & CFG4speed) ||
2300             config.target_cpu < TARGET_Pentium ||
2301             tyfarfunc(tym) ||
2302             config.flags & CFGstack ||
2303             localsize >= 0x100 ||       // arbitrary value < 0x1000
2304             (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) ||
2305             calledFinally ||
2306             Alloca.size
2307            )
2308             goto Lcant;
2309     }
2310     return mBP;
2311 
2312 Lcant:
2313     return 0;
2314 }
2315 
2316 /*************************************************
2317  * Generate code segment to be used later to restore a cse
2318  */
2319 
2320 bool cse_simple(code *c, elem *e)
2321 {
2322     regm_t regm;
2323     reg_t reg;
2324     int sz = tysize(e.Ety);
2325 
2326     if (!I16 &&                                  // don't bother with 16 bit code
2327         e.Eoper == OPadd &&
2328         sz == REGSIZE &&
2329         e.EV.E2.Eoper == OPconst &&
2330         e.EV.E1.Eoper == OPvar &&
2331         isregvar(e.EV.E1,&regm,&reg) &&
2332         !(e.EV.E1.EV.Vsym.Sflags & SFLspill)
2333        )
2334     {
2335         memset(c,0,(*c).sizeof);
2336 
2337         // Make this an LEA instruction
2338         c.Iop = LEA;
2339         buildEA(c,reg,-1,1,e.EV.E2.EV.Vuns);
2340         if (I64)
2341         {   if (sz == 8)
2342                 c.Irex |= REX_W;
2343         }
2344 
2345         return true;
2346     }
2347     else if (e.Eoper == OPind &&
2348         sz <= REGSIZE &&
2349         e.EV.E1.Eoper == OPvar &&
2350         isregvar(e.EV.E1,&regm,&reg) &&
2351         (I32 || I64 || regm & IDXREGS) &&
2352         !(e.EV.E1.EV.Vsym.Sflags & SFLspill)
2353        )
2354     {
2355         memset(c,0,(*c).sizeof);
2356 
2357         // Make this a MOV instruction
2358         c.Iop = (sz == 1) ? 0x8A : 0x8B;       // MOV reg,EA
2359         buildEA(c,reg,-1,1,0);
2360         if (sz == 2 && I32)
2361             c.Iflags |= CFopsize;
2362         else if (I64)
2363         {   if (sz == 8)
2364                 c.Irex |= REX_W;
2365         }
2366 
2367         return true;
2368     }
2369     return false;
2370 }
2371 
2372 /**************************
2373  * Store `reg` to the common subexpression save area in index `slot`.
2374  * Params:
2375  *      cdb = where to write code to
2376  *      tym = type of value that's in `reg`
2377  *      reg = register to save
2378  *      slot = index into common subexpression save area
2379  */
2380 void gen_storecse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot)
2381 {
2382     // MOV slot[BP],reg
2383     if (isXMMreg(reg) && config.fpxmmregs) // watch out for ES
2384     {
2385         const aligned = tyvector(tym) ? STACKALIGN >= 16 : true;
2386         const op = xmmstore(tym, aligned);
2387         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot);
2388         return;
2389     }
2390     opcode_t op = STO;              // normal mov
2391     if (reg == ES)
2392     {
2393         reg = 0;            // the real reg number
2394         op = 0x8C;          // segment reg mov
2395     }
2396     cdb.genc1(op,modregxrm(2, reg, BPRM),FLcs,cast(targ_uns)slot);
2397     if (I64)
2398         code_orrex(cdb.last(), REX_W);
2399 }
2400 
2401 void gen_testcse(ref CodeBuilder cdb, tym_t tym, uint sz, size_t slot)
2402 {
2403     // CMP slot[BP],0
2404     cdb.genc(sz == 1 ? 0x80 : 0x81,modregrm(2,7,BPRM),
2405                 FLcs,cast(targ_uns)slot, FLconst,cast(targ_uns) 0);
2406     if ((I64 || I32) && sz == 2)
2407         cdb.last().Iflags |= CFopsize;
2408     if (I64 && sz == 8)
2409         code_orrex(cdb.last(), REX_W);
2410 }
2411 
2412 void gen_loadcse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot)
2413 {
2414     // MOV reg,slot[BP]
2415     if (isXMMreg(reg) && config.fpxmmregs)
2416     {
2417         const aligned = tyvector(tym) ? STACKALIGN >= 16 : true;
2418         const op = xmmload(tym, aligned);
2419         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot);
2420         return;
2421     }
2422     opcode_t op = LOD;
2423     if (reg == ES)
2424     {
2425         op = 0x8E;
2426         reg = 0;
2427     }
2428     cdb.genc1(op,modregxrm(2,reg,BPRM),FLcs,cast(targ_uns)slot);
2429     if (I64)
2430         code_orrex(cdb.last(), REX_W);
2431 }
2432 
2433 /***************************************
2434  * Gen code for OPframeptr
2435  */
2436 
2437 void cdframeptr(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2438 {
2439     regm_t retregs = *pretregs & allregs;
2440     if  (!retregs)
2441         retregs = allregs;
2442     reg_t reg;
2443     allocreg(cdb,&retregs, &reg, TYint);
2444 
2445     code cs;
2446     cs.Iop = ESCAPE | ESCframeptr;
2447     cs.Iflags = 0;
2448     cs.Irex = 0;
2449     cs.Irm = cast(ubyte)reg;
2450     cdb.gen(&cs);
2451     fixresult(cdb,e,retregs,pretregs);
2452 }
2453 
2454 /***************************************
2455  * Gen code for load of _GLOBAL_OFFSET_TABLE_.
2456  * This value gets cached in the local variable 'localgot'.
2457  */
2458 
2459 void cdgot(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2460 {
2461     static if (TARGET_OSX)
2462     {
2463         regm_t retregs = *pretregs & allregs;
2464         if  (!retregs)
2465             retregs = allregs;
2466         reg_t reg;
2467         allocreg(cdb,&retregs, &reg, TYnptr);
2468 
2469         cdb.genc(CALL,0,0,0,FLgot,0);     //     CALL L1
2470         cdb.gen1(0x58 + reg);             // L1: POP reg
2471 
2472         fixresult(cdb,e,retregs,pretregs);
2473     }
2474     else static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
2475     {
2476         regm_t retregs = *pretregs & allregs;
2477         if  (!retregs)
2478             retregs = allregs;
2479         reg_t reg;
2480         allocreg(cdb,&retregs, &reg, TYnptr);
2481 
2482         cdb.genc2(CALL,0,0);        //     CALL L1
2483         cdb.gen1(0x58 + reg);       // L1: POP reg
2484 
2485                                     //     ADD reg,_GLOBAL_OFFSET_TABLE_+3
2486         Symbol *gotsym = Obj.getGOTsym();
2487         cdb.gencs(0x81,modregrm(3,0,reg),FLextern,gotsym);
2488         /* Because the 2:3 offset from L1: is hardcoded,
2489          * this sequence of instructions must not
2490          * have any instructions in between,
2491          * so set CFvolatile to prevent the scheduler from rearranging it.
2492          */
2493         code *cgot = cdb.last();
2494         cgot.Iflags = CFoff | CFvolatile;
2495         cgot.IEV2.Voffset = (reg == AX) ? 2 : 3;
2496 
2497         makeitextern(gotsym);
2498         fixresult(cdb,e,retregs,pretregs);
2499     }
2500     else
2501         assert(0);
2502 }
2503 
2504 /**************************************************
2505  * Load contents of localgot into EBX.
2506  */
2507 
2508 void load_localgot(ref CodeBuilder cdb)
2509 {
2510     static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
2511     {
2512         if (config.flags3 & CFG3pic && I32)
2513         {
2514             if (localgot && !(localgot.Sflags & SFLdead))
2515             {
2516                 localgot.Sflags &= ~GTregcand;     // because this hack doesn't work with reg allocator
2517                 elem *e = el_var(localgot);
2518                 regm_t retregs = mBX;
2519                 codelem(cdb,e,&retregs,false);
2520                 el_free(e);
2521             }
2522             else
2523             {
2524                 elem *e = el_long(TYnptr, 0);
2525                 e.Eoper = OPgot;
2526                 regm_t retregs = mBX;
2527                 codelem(cdb,e,&retregs,false);
2528                 el_free(e);
2529             }
2530         }
2531     }
2532 }
2533 
2534 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
2535 {
2536 /*****************************
2537  * Returns:
2538  *      # of bytes stored
2539  */
2540 
2541 
2542 private int obj_namestring(char *p,const(char)* name)
2543 {
2544     size_t len = strlen(name);
2545     if (len > 255)
2546     {
2547         short *ps = cast(short *)p;
2548         p[0] = 0xFF;
2549         p[1] = 0;
2550         ps[1] = cast(short)len;
2551         memcpy(p + 4,name,len);
2552         const int ONS_OHD = 4;           // max # of extra bytes added by obj_namestring()
2553         len += ONS_OHD;
2554     }
2555     else
2556     {
2557         p[0] = cast(char)len;
2558         memcpy(p + 1,name,len);
2559         len++;
2560     }
2561     return cast(int)len;
2562 }
2563 }
2564 
2565 void genregs(ref CodeBuilder cdb,opcode_t op,uint dstreg,uint srcreg)
2566 {
2567     return cdb.gen2(op,modregxrmx(3,dstreg,srcreg));
2568 }
2569 
2570 void gentstreg(ref CodeBuilder cdb, uint t)
2571 {
2572     cdb.gen2(0x85,modregxrmx(3,t,t));   // TEST t,t
2573     code_orflag(cdb.last(),CFpsw);
2574 }
2575 
2576 void genpush(ref CodeBuilder cdb, reg_t reg)
2577 {
2578     cdb.gen1(0x50 + (reg & 7));
2579     if (reg & 8)
2580         code_orrex(cdb.last(), REX_B);
2581 }
2582 
2583 void genpop(ref CodeBuilder cdb, reg_t reg)
2584 {
2585     cdb.gen1(0x58 + (reg & 7));
2586     if (reg & 8)
2587         code_orrex(cdb.last(), REX_B);
2588 }
2589 
2590 /**************************
2591  * Generate a MOV to,from register instruction.
2592  * Smart enough to dump redundant register moves, and segment
2593  * register moves.
2594  */
2595 
2596 code *genmovreg(uint to,uint from)
2597 {
2598     CodeBuilder cdb; cdb.ctor();
2599     genmovreg(cdb, to, from);
2600     return cdb.finish();
2601 }
2602 
2603 void genmovreg(ref CodeBuilder cdb,uint to,uint from)
2604 {
2605     genmovreg(cdb, to, from, TYMAX);
2606 }
2607 
2608 void genmovreg(ref CodeBuilder cdb, uint to, uint from, tym_t tym)
2609 {
2610     // register kind. ex: GPR,XMM,SEG
2611     static uint _K(uint reg)
2612     {
2613         switch (reg)
2614         {
2615         case ES:                   return ES;
2616         case XMM15:
2617         case XMM0: .. case XMM7:   return XMM0;
2618         case AX:   .. case R15:    return AX;
2619         default:                   return reg;
2620         }
2621     }
2622 
2623     // kind combination (order kept)
2624     static uint _X(uint to, uint from) { return (_K(to) << 8) + _K(from); }
2625 
2626     if (to != from)
2627     {
2628         if (tym == TYMAX) tym = TYsize_t; // avoid register slicing
2629         switch (_X(to, from))
2630         {
2631             case _X(AX, AX):
2632                 genregs(cdb, 0x89, from, to);    // MOV to,from
2633                 if (I64 && tysize(tym) >= 8)
2634                     code_orrex(cdb.last(), REX_W);
2635                 else if (tysize(tym) == 2)
2636                     code_orflag(cdb.last(), CFopsize);
2637                 break;
2638 
2639             case _X(XMM0, XMM0):             // MOVD/Q to,from
2640                 genregs(cdb, xmmload(tym), to-XMM0, from-XMM0);
2641                 checkSetVex(cdb.last(), tym);
2642                 break;
2643 
2644             case _X(AX, XMM0):               // MOVD/Q to,from
2645                 genregs(cdb, STOD, from-XMM0, to);
2646                 if (I64 && tysize(tym) >= 8)
2647                     code_orrex(cdb.last(), REX_W);
2648                 checkSetVex(cdb.last(), tym);
2649                 break;
2650 
2651             case _X(XMM0, AX):               // MOVD/Q to,from
2652                 genregs(cdb, LODD, to-XMM0, from);
2653                 if (I64 && tysize(tym) >= 8)
2654                     code_orrex(cdb.last(),  REX_W);
2655                 checkSetVex(cdb.last(), tym);
2656                 break;
2657 
2658             case _X(ES, AX):
2659                 assert(tysize(tym) <= REGSIZE);
2660                 genregs(cdb, 0x8E, 0, from);
2661                 break;
2662 
2663             case _X(AX, ES):
2664                 assert(tysize(tym) <= REGSIZE);
2665                 genregs(cdb, 0x8C, 0, to);
2666                 break;
2667 
2668             default:
2669                 debug printf("genmovreg(to = %s, from = %s)\n"
2670                     , regm_str(mask(to)), regm_str(mask(from)));
2671                 assert(0);
2672         }
2673     }
2674 }
2675 
2676 /***************************************
2677  * Generate immediate multiply instruction for r1=r2*imm.
2678  * Optimize it into LEA's if we can.
2679  */
2680 
2681 void genmulimm(ref CodeBuilder cdb,uint r1,uint r2,targ_int imm)
2682 {
2683     // These optimizations should probably be put into pinholeopt()
2684     switch (imm)
2685     {
2686         case 1:
2687             genmovreg(cdb,r1,r2);
2688             break;
2689 
2690         case 5:
2691         {
2692             code cs;
2693             cs.Iop = LEA;
2694             cs.Iflags = 0;
2695             cs.Irex = 0;
2696             buildEA(&cs,r2,r2,4,0);
2697             cs.orReg(r1);
2698             cdb.gen(&cs);
2699             break;
2700         }
2701 
2702         default:
2703             cdb.genc2(0x69,modregxrmx(3,r1,r2),imm);    // IMUL r1,r2,imm
2704             break;
2705     }
2706 }
2707 
2708 /******************************
2709  * Load CX with the value of _AHSHIFT.
2710  */
2711 
2712 void genshift(ref CodeBuilder cdb)
2713 {
2714     version (SCPP)
2715     {
2716         // Set up ahshift to trick ourselves into giving the right fixup,
2717         // which must be seg-relative, external frame, external target.
2718         cdb.gencs(0xC7,modregrm(3,0,CX),FLfunc,getRtlsym(RTLSYM_AHSHIFT));
2719         cdb.last().Iflags |= CFoff;
2720     }
2721     else
2722         assert(0);
2723 }
2724 
2725 /******************************
2726  * Move constant value into reg.
2727  * Take advantage of existing values in registers.
2728  * If flags & mPSW
2729  *      set flags based on result
2730  * Else if flags & 8
2731  *      do not disturb flags
2732  * Else
2733  *      don't care about flags
2734  * If flags & 1 then byte move
2735  * If flags & 2 then short move (for I32 and I64)
2736  * If flags & 4 then don't disturb unused portion of register
2737  * If flags & 16 then reg is a byte register AL..BH
2738  * If flags & 64 (0x40) then 64 bit move (I64 only)
2739  * Returns:
2740  *      code (if any) generated
2741  */
2742 
2743 void movregconst(ref CodeBuilder cdb,reg_t reg,targ_size_t value,regm_t flags)
2744 {
2745     reg_t r;
2746     regm_t mreg;
2747 
2748     //printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask(reg)), value, value, flags);
2749 
2750     regm_t regm = regcon.immed.mval & mask(reg);
2751     targ_size_t regv = regcon.immed.value[reg];
2752 
2753     if (flags & 1)      // 8 bits
2754     {
2755         value &= 0xFF;
2756         regm &= BYTEREGS;
2757 
2758         // If we already have the right value in the right register
2759         if (regm && (regv & 0xFF) == value)
2760             goto L2;
2761 
2762         if (flags & 16 && reg & 4 &&    // if an H byte register
2763             regcon.immed.mval & mask(reg & 3) &&
2764             (((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value)
2765             goto L2;
2766 
2767         /* Avoid byte register loads on Pentium Pro and Pentium II
2768          * to avoid dependency stalls.
2769          */
2770         if (config.flags4 & CFG4speed &&
2771             config.target_cpu >= TARGET_PentiumPro && !(flags & 4))
2772             goto L3;
2773 
2774         // See if another register has the right value
2775         r = 0;
2776         for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1)
2777         {
2778             if (mreg & 1)
2779             {
2780                 if ((regcon.immed.value[r] & 0xFF) == value)
2781                 {
2782                     genregs(cdb,0x8A,reg,r);          // MOV regL,rL
2783                     if (I64 && reg >= 4 || r >= 4)
2784                         code_orrex(cdb.last(), REX);
2785                     goto L2;
2786                 }
2787                 if (!(I64 && reg >= 4) &&
2788                     r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value)
2789                 {
2790                     genregs(cdb,0x8A,reg,r | 4);      // MOV regL,rH
2791                     goto L2;
2792                 }
2793             }
2794             r++;
2795         }
2796 
2797         if (value == 0 && !(flags & 8))
2798         {
2799             if (!(flags & 4) &&                 // if we can set the whole register
2800                 !(flags & 16 && reg & 4))       // and reg is not an H register
2801             {
2802                 genregs(cdb,0x31,reg,reg);      // XOR reg,reg
2803                 regimmed_set(reg,value);
2804                 regv = 0;
2805             }
2806             else
2807                 genregs(cdb,0x30,reg,reg);      // XOR regL,regL
2808             flags &= ~mPSW;                     // flags already set by XOR
2809         }
2810         else
2811         {
2812             cdb.genc2(0xC6,modregrmx(3,0,reg),value);  // MOV regL,value
2813             if (reg >= 4 && I64)
2814             {
2815                 code_orrex(cdb.last(), REX);
2816             }
2817         }
2818     L2:
2819         if (flags & mPSW)
2820             genregs(cdb,0x84,reg,reg);            // TEST regL,regL
2821 
2822         if (regm)
2823             // Set just the 'L' part of the register value
2824             regimmed_set(reg,(regv & ~cast(targ_size_t)0xFF) | value);
2825         else if (flags & 16 && reg & 4 && regcon.immed.mval & mask(reg & 3))
2826             // Set just the 'H' part of the register value
2827             regimmed_set((reg & 3),(regv & ~cast(targ_size_t)0xFF00) | (value << 8));
2828         return;
2829     }
2830 L3:
2831     if (I16)
2832         value = cast(targ_short) value;             // sign-extend MSW
2833     else if (I32)
2834         value = cast(targ_int) value;
2835 
2836     if (!I16 && flags & 2)                      // load 16 bit value
2837     {
2838         value &= 0xFFFF;
2839         if (value == 0)
2840             goto L1;
2841         else
2842         {
2843             if (flags & mPSW)
2844                 goto L1;
2845             cdb.genc2(0xC7,modregrmx(3,0,reg),value); // MOV reg,value
2846             cdb.last().Iflags |= CFopsize;           // yes, even for I64
2847             if (regm)
2848                 // High bits of register are not affected by 16 bit load
2849                 regimmed_set(reg,(regv & ~cast(targ_size_t)0xFFFF) | value);
2850         }
2851         return;
2852     }
2853 L1:
2854 
2855     // If we already have the right value in the right register
2856     if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64))
2857     {
2858         if (flags & mPSW)
2859             gentstreg(cdb,reg);
2860     }
2861     else if (flags & 64 && regm && regv == value)
2862     {   // Look at the full 64 bits
2863         if (flags & mPSW)
2864         {
2865             gentstreg(cdb,reg);
2866             code_orrex(cdb.last(), REX_W);
2867         }
2868     }
2869     else
2870     {
2871         if (flags & mPSW)
2872         {
2873             switch (value)
2874             {
2875                 case 0:
2876                     genregs(cdb,0x31,reg,reg);
2877                     break;
2878 
2879                 case 1:
2880                     if (I64)
2881                         goto L4;
2882                     genregs(cdb,0x31,reg,reg);
2883                     goto inc;
2884 
2885                 case ~cast(targ_size_t)0:
2886                     if (I64)
2887                         goto L4;
2888                     genregs(cdb,0x31,reg,reg);
2889                     goto dec;
2890 
2891                 default:
2892                 L4:
2893                     if (flags & 64)
2894                     {
2895                         cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64
2896                         gentstreg(cdb,reg);
2897                         code_orrex(cdb.last(), REX_W);
2898                     }
2899                     else
2900                     {
2901                         value &= 0xFFFFFFFF;
2902                         cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value
2903                         gentstreg(cdb,reg);
2904                     }
2905                     break;
2906             }
2907         }
2908         else
2909         {
2910             // Look for single byte conversion
2911             if (regcon.immed.mval & mAX)
2912             {
2913                 if (I32)
2914                 {
2915                     if (reg == AX && value == cast(targ_short) regv)
2916                     {
2917                         cdb.gen1(0x98);               // CWDE
2918                         goto done;
2919                     }
2920                     if (reg == DX &&
2921                         value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) &&
2922                         !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
2923                        )
2924                     {
2925                         cdb.gen1(0x99);               // CDQ
2926                         goto done;
2927                     }
2928                 }
2929                 else if (I16)
2930                 {
2931                     if (reg == AX &&
2932                         cast(targ_short) value == cast(byte) regv)
2933                     {
2934                         cdb.gen1(0x98);               // CBW
2935                         goto done;
2936                     }
2937 
2938                     if (reg == DX &&
2939                         cast(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? cast(targ_short) 0xFFFF : cast(targ_short) 0) &&
2940                         !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
2941                        )
2942                     {
2943                         cdb.gen1(0x99);               // CWD
2944                         goto done;
2945                     }
2946                 }
2947             }
2948             if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486)
2949             {
2950                 genregs(cdb,0x31,reg,reg);              // XOR reg,reg
2951                 goto done;
2952             }
2953 
2954             if (!I64 && regm && !(flags & 8))
2955             {
2956                 if (regv + 1 == value ||
2957                     // Catch case of (0xFFFF+1 == 0) for 16 bit compiles
2958                     (I16 && cast(targ_short)(regv + 1) == cast(targ_short)value))
2959                 {
2960                 inc:
2961                     cdb.gen1(0x40 + reg);     // INC reg
2962                     goto done;
2963                 }
2964                 if (regv - 1 == value)
2965                 {
2966                 dec:
2967                     cdb.gen1(0x48 + reg);     // DEC reg
2968                     goto done;
2969                 }
2970             }
2971 
2972             // See if another register has the right value
2973             r = 0;
2974             for (mreg = regcon.immed.mval; mreg; mreg >>= 1)
2975             {
2976                 debug
2977                 assert(!I16 || regcon.immed.value[r] == cast(targ_short)regcon.immed.value[r]);
2978 
2979                 if (mreg & 1 && regcon.immed.value[r] == value)
2980                 {
2981                     genmovreg(cdb,reg,r);
2982                     goto done;
2983                 }
2984                 r++;
2985             }
2986 
2987             if (value == 0 && !(flags & 8))
2988             {
2989                 genregs(cdb,0x31,reg,reg);              // XOR reg,reg
2990             }
2991             else
2992             {   // See if we can just load a byte
2993                 if (regm & BYTEREGS &&
2994                     !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro)
2995                    )
2996                 {
2997                     if ((regv & ~cast(targ_size_t)0xFF) == (value & ~cast(targ_size_t)0xFF))
2998                     {
2999                         movregconst(cdb,reg,value,(flags & 8) |4|1);  // load regL
3000                         return;
3001                     }
3002                     if (regm & (mAX|mBX|mCX|mDX) &&
3003                         (regv & ~cast(targ_size_t)0xFF00) == (value & ~cast(targ_size_t)0xFF00) &&
3004                         !I64)
3005                     {
3006                         movregconst(cdb,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH
3007                         return;
3008                     }
3009                 }
3010                 if (flags & 64)
3011                     cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64
3012                 else
3013                 {
3014                     value &= 0xFFFFFFFF;
3015                     cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value
3016                 }
3017             }
3018         }
3019     done:
3020         regimmed_set(reg,value);
3021     }
3022 }
3023 
3024 /**************************
3025  * Generate a jump instruction.
3026  */
3027 
3028 void genjmp(ref CodeBuilder cdb,opcode_t op,uint fltarg,block *targ)
3029 {
3030     code cs;
3031     cs.Iop = op & 0xFF;
3032     cs.Iflags = 0;
3033     cs.Irex = 0;
3034     if (op != JMP && op != 0xE8)        // if not already long branch
3035           cs.Iflags = CFjmp16;          // assume long branch for op = 0x7x
3036     cs.IFL2 = cast(ubyte)fltarg;        // FLblock (or FLcode)
3037     cs.IEV2.Vblock = targ;              // target block (or code)
3038     if (fltarg == FLcode)
3039         (cast(code *)targ).Iflags |= CFtarg;
3040 
3041     if (config.flags4 & CFG4fastfloat)  // if fast floating point
3042     {
3043         cdb.gen(&cs);
3044         return;
3045     }
3046 
3047     switch (op & 0xFF00)                // look at second jump opcode
3048     {
3049         // The JP and JNP come from floating point comparisons
3050         case JP << 8:
3051             cdb.gen(&cs);
3052             cs.Iop = JP;
3053             cdb.gen(&cs);
3054             break;
3055 
3056         case JNP << 8:
3057         {
3058             // Do a JP around the jump instruction
3059             code *cnop = gennop(null);
3060             genjmp(cdb,JP,FLcode,cast(block *) cnop);
3061             cdb.gen(&cs);
3062             cdb.append(cnop);
3063             break;
3064         }
3065 
3066         case 1 << 8:                    // toggled no jump
3067         case 0 << 8:
3068             cdb.gen(&cs);
3069             break;
3070 
3071         default:
3072             debug
3073             printf("jop = x%x\n",op);
3074             assert(0);
3075     }
3076 }
3077 
3078 /*********************************************
3079  * Generate first part of prolog for interrupt function.
3080  */
3081 void prolog_ifunc(ref CodeBuilder cdb, tym_t* tyf)
3082 {
3083     static immutable ubyte[4] ops2 = [ 0x60,0x1E,0x06,0 ];
3084     static immutable ubyte[11] ops0 = [ 0x50,0x51,0x52,0x53,
3085                                     0x54,0x55,0x56,0x57,
3086                                     0x1E,0x06,0 ];
3087 
3088     immutable(ubyte)* p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr;
3089     do
3090         cdb.gen1(*p);
3091     while (*++p);
3092 
3093     genregs(cdb,0x8B,BP,SP);     // MOV BP,SP
3094     if (localsize)
3095         cod3_stackadj(cdb, cast(int)localsize);
3096 
3097     *tyf |= mTYloadds;
3098 }
3099 
3100 void prolog_ifunc2(ref CodeBuilder cdb, tym_t tyf, tym_t tym, bool pushds)
3101 {
3102     /* Determine if we need to reload DS        */
3103     if (tyf & mTYloadds)
3104     {
3105         if (!pushds)                           // if not already pushed
3106             cdb.gen1(0x1E);                    // PUSH DS
3107         spoff += _tysize[TYint];
3108         cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); // MOV  AX,DGROUP
3109         code *c = cdb.last();
3110         c.IEV2.Vseg = DATA;
3111         c.Iflags ^= CFseg | CFoff;            // turn off CFoff, on CFseg
3112         cdb.gen2(0x8E,modregrm(3,3,AX));       // MOV  DS,AX
3113         useregs(mAX);
3114     }
3115 
3116     if (tym == TYifunc)
3117         cdb.gen1(0xFC);                        // CLD
3118 }
3119 
3120 void prolog_16bit_windows_farfunc(ref CodeBuilder cdb, tym_t* tyf, bool* pushds)
3121 {
3122     int wflags = config.wflags;
3123     if (wflags & WFreduced && !(*tyf & mTYexport))
3124     {   // reduced prolog/epilog for non-exported functions
3125         wflags &= ~(WFdgroup | WFds | WFss);
3126     }
3127 
3128     getregsNoSave(mAX);                     // should not have any value in AX
3129 
3130     int segreg;
3131     switch (wflags & (WFdgroup | WFds | WFss))
3132     {
3133         case WFdgroup:                      // MOV  AX,DGROUP
3134         {
3135             if (wflags & WFreduced)
3136                 *tyf &= ~mTYloadds;          // remove redundancy
3137             cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0);
3138             code *c = cdb.last();
3139             c.IEV2.Vseg = DATA;
3140             c.Iflags ^= CFseg | CFoff;     // turn off CFoff, on CFseg
3141             break;
3142         }
3143 
3144         case WFss:
3145             segreg = 2;                     // SS
3146             goto Lmovax;
3147 
3148         case WFds:
3149             segreg = 3;                     // DS
3150         Lmovax:
3151             cdb.gen2(0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg
3152             if (wflags & WFds)
3153                 cdb.gen1(0x90);             // NOP
3154             break;
3155 
3156         case 0:
3157             break;
3158 
3159         default:
3160             debug
3161             printf("config.wflags = x%x\n",config.wflags);
3162             assert(0);
3163     }
3164     if (wflags & WFincbp)
3165         cdb.gen1(0x40 + BP);              // INC  BP
3166     cdb.gen1(0x50 + BP);                  // PUSH BP
3167     genregs(cdb,0x8B,BP,SP); // MOV  BP,SP
3168     if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
3169     {
3170         cdb.gen1(0x1E);                       // PUSH DS
3171         *pushds = true;
3172         BPoff = -REGSIZE;
3173     }
3174     if (wflags & (WFds | WFss | WFdgroup))
3175         cdb.gen2(0x8E,modregrm(3,3,AX));      // MOV  DS,AX
3176 }
3177 
3178 /**********************************************
3179  * Set up frame register.
3180  * Input:
3181  *      *xlocalsize     amount of local variables
3182  * Output:
3183  *      *enter          set to true if ENTER instruction can be used, false otherwise
3184  *      *xlocalsize     amount to be subtracted from stack pointer
3185  *      *cfa_offset     the frame pointer's offset from the CFA
3186  * Returns:
3187  *      generated code
3188  */
3189 
3190 void prolog_frame(ref CodeBuilder cdb, uint farfunc, uint* xlocalsize, bool* enter, int* cfa_offset)
3191 {
3192     //printf("prolog_frame\n");
3193     *cfa_offset = 0;
3194 
3195     if (0 && config.exe == EX_WIN64)
3196     {
3197         // PUSH RBP
3198         // LEA RBP,0[RSP]
3199         cdb. gen1(0x50 + BP);
3200         cdb.genc1(LEA,(REX_W<<16) | (modregrm(0,4,SP)<<8) | modregrm(2,BP,4),FLconst,0);
3201         *enter = false;
3202         return;
3203     }
3204 
3205     if (config.wflags & WFincbp && farfunc)
3206         cdb.gen1(0x40 + BP);      // INC  BP
3207     if (config.target_cpu < TARGET_80286 ||
3208         config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_OSX64 | EX_FREEBSD | EX_FREEBSD64 | EX_DRAGONFLYBSD64 | EX_SOLARIS | EX_SOLARIS64 | EX_WIN64) ||
3209         !localsize ||
3210         config.flags & CFGstack ||
3211         (*xlocalsize >= 0x1000 && config.exe & EX_flat) ||
3212         localsize >= 0x10000 ||
3213         (NTEXCEPTIONS == 2 &&
3214          (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))) ||
3215         (config.target_cpu >= TARGET_80386 &&
3216          config.flags4 & CFG4speed)
3217        )
3218     {
3219         cdb.gen1(0x50 + BP);      // PUSH BP
3220         genregs(cdb,0x8B,BP,SP);      // MOV  BP,SP
3221         if (I64)
3222             code_orrex(cdb.last(), REX_W);   // MOV RBP,RSP
3223         if ((config.objfmt & (OBJ_ELF | OBJ_MACH)) && config.fulltypes)
3224             // Don't reorder instructions, as dwarf CFA relies on it
3225             code_orflag(cdb.last(), CFvolatile);
3226 static if (NTEXCEPTIONS == 2)
3227 {
3228         if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))
3229         {
3230             nteh_prolog(cdb);
3231             int sz = nteh_contextsym_size();
3232             assert(sz != 0);        // should be 5*4, not 0
3233             *xlocalsize -= sz;      // sz is already subtracted from ESP
3234                                     // by nteh_prolog()
3235         }
3236 }
3237         if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D ||
3238             config.ehmethod == EHmethod.EH_DWARF)
3239         {
3240             int off = 2 * REGSIZE;      // 1 for the return address + 1 for the PUSH EBP
3241             dwarf_CFA_set_loc(1);           // address after PUSH EBP
3242             dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP]
3243             dwarf_CFA_offset(BP, -off);       // EBP is at 0[ESP]
3244             dwarf_CFA_set_loc(I64 ? 4 : 3);   // address after MOV EBP,ESP
3245             /* Oddly, the CFA is not the same as the frame pointer,
3246              * which is why the offset of BP is set to 8
3247              */
3248             dwarf_CFA_set_reg_offset(BP, off);        // CFA is now 0[EBP]
3249             *cfa_offset = off;  // remember the difference between the CFA and the frame pointer
3250         }
3251         *enter = false;              /* do not use ENTER instruction */
3252     }
3253     else
3254         *enter = true;
3255 }
3256 
3257 /**********************************************
3258  * Enforce stack alignment.
3259  * Input:
3260  *      cdb     code builder.
3261  * Returns:
3262  *      generated code
3263  */
3264 void prolog_stackalign(ref CodeBuilder cdb)
3265 {
3266     if (!enforcealign)
3267         return;
3268 
3269     const offset = (hasframe ? 2 : 1) * REGSIZE;   // 1 for the return address + 1 for the PUSH EBP
3270     if (offset & (STACKALIGN - 1) || TARGET_STACKALIGN < STACKALIGN)
3271         cod3_stackalign(cdb, STACKALIGN);
3272 }
3273 
3274 void prolog_frameadj(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool enter, bool* pushalloc)
3275 {
3276     uint pushallocreg = (tyf == TYmfunc) ? CX : AX;
3277 static if (TARGET_LINUX)
3278 {
3279     bool check = false;               // seems that Linux doesn't need to fault in stack pages
3280 }
3281 else
3282 {
3283     bool check = (config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check
3284         || (TARGET_WINDOS && xlocalsize >= 0x1000 && config.exe & EX_flat);
3285 }
3286     if (check)
3287     {
3288         if (I16)
3289         {
3290             // BUG: Won't work if parameter is passed in AX
3291             movregconst(cdb,AX,xlocalsize,false); // MOV AX,localsize
3292             makeitextern(getRtlsym(RTLSYM_CHKSTK));
3293                                                     // CALL _chkstk
3294             cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_CHKSTK));
3295             useregs((ALLREGS | mBP | mES) & ~getRtlsym(RTLSYM_CHKSTK).Sregsaved);
3296         }
3297         else
3298         {
3299             /* Watch out for 64 bit code where EDX is passed as a register parameter
3300              */
3301             reg_t reg = I64 ? R11 : DX;  // scratch register
3302 
3303             /*      MOV     EDX, xlocalsize/0x1000
3304              *  L1: SUB     ESP, 0x1000
3305              *      TEST    [ESP],ESP
3306              *      DEC     EDX
3307              *      JNE     L1
3308              *      SUB     ESP, xlocalsize % 0x1000
3309              */
3310             movregconst(cdb, reg, xlocalsize / 0x1000, false);
3311             cod3_stackadj(cdb, 0x1000);
3312             code_orflag(cdb.last(), CFtarg2);
3313             cdb.gen2sib(0x85, modregrm(0,SP,4),modregrm(0,4,SP));
3314             if (I64)
3315             {   cdb.gen2(0xFF, modregrmx(3,1,R11));   // DEC R11D
3316                 cdb.genc2(JNE,0,cast(targ_uns)-15);
3317             }
3318             else
3319             {   cdb.gen1(0x48 + DX);                  // DEC EDX
3320                 cdb.genc2(JNE,0,cast(targ_uns)-12);
3321             }
3322             regimmed_set(reg,0);             // reg is now 0
3323             cod3_stackadj(cdb, xlocalsize & 0xFFF);
3324             useregs(mask(reg));
3325         }
3326     }
3327     else
3328     {
3329         if (enter)
3330         {   // ENTER xlocalsize,0
3331             cdb.genc(0xC8,0,FLconst,xlocalsize,FLconst,cast(targ_uns) 0);
3332             assert(!(config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)); // didn't emit Dwarf data
3333         }
3334         else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized)
3335         {
3336             cdb. gen1(0x50 + pushallocreg);    // PUSH AX
3337             // Do this to prevent an -x[EBP] to be moved in
3338             // front of the push.
3339             code_orflag(cdb.last(),CFvolatile);
3340             *pushalloc = true;
3341         }
3342         else
3343             cod3_stackadj(cdb, xlocalsize);
3344     }
3345 }
3346 
3347 void prolog_frameadj2(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool* pushalloc)
3348 {
3349     uint pushallocreg = (tyf == TYmfunc) ? CX : AX;
3350     if (xlocalsize == REGSIZE)
3351     {
3352         cdb.gen1(0x50 + pushallocreg);      // PUSH AX
3353         *pushalloc = true;
3354     }
3355     else if (xlocalsize == 2 * REGSIZE)
3356     {
3357         cdb.gen1(0x50 + pushallocreg);      // PUSH AX
3358         cdb.gen1(0x50 + pushallocreg);      // PUSH AX
3359         *pushalloc = true;
3360     }
3361     else
3362         cod3_stackadj(cdb, xlocalsize);
3363 }
3364 
3365 void prolog_setupalloca(ref CodeBuilder cdb)
3366 {
3367     //printf("prolog_setupalloca() offset x%x size x%x alignment x%x\n",
3368         //cast(int)Alloca.offset, cast(int)Alloca.size, cast(int)Alloca.alignment);
3369     // Set up magic parameter for alloca()
3370     // MOV -REGSIZE[BP],localsize - BPoff
3371     cdb.genc(0xC7,modregrm(2,0,BPRM),
3372             FLconst,Alloca.offset + BPoff,
3373             FLconst,localsize - BPoff);
3374     if (I64)
3375         code_orrex(cdb.last(), REX_W);
3376 }
3377 
3378 /**************************************
3379  * Save registers that the function destroys,
3380  * but that the ABI says should be preserved across
3381  * function calls.
3382  *
3383  * Emit Dwarf info for these saves.
3384  * Params:
3385  *      cdb = append generated instructions to this
3386  *      topush = mask of registers to push
3387  *      cfa_offset = offset of frame pointer from CFA
3388  */
3389 
3390 void prolog_saveregs(ref CodeBuilder cdb, regm_t topush, int cfa_offset)
3391 {
3392     if (pushoffuse)
3393     {
3394         // Save to preallocated section in the stack frame
3395         int xmmtopush = numbitsset(topush & XMMREGS);   // XMM regs take 16 bytes
3396         int gptopush = numbitsset(topush) - xmmtopush;  // general purpose registers to save
3397         targ_size_t xmmoffset = pushoff + BPoff;
3398         if (!hasframe || enforcealign)
3399             xmmoffset += EBPtoESP;
3400         targ_size_t gpoffset = xmmoffset + xmmtopush * 16;
3401         while (topush)
3402         {
3403             reg_t reg = findreg(topush);
3404             topush &= ~mask(reg);
3405             if (isXMMreg(reg))
3406             {
3407                 if (hasframe && !enforcealign)
3408                 {
3409                     // MOVUPD xmmoffset[EBP],xmm
3410                     cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset);
3411                 }
3412                 else
3413                 {
3414                     // MOVUPD xmmoffset[ESP],xmm
3415                     cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset);
3416                 }
3417                 xmmoffset += 16;
3418             }
3419             else
3420             {
3421                 if (hasframe && !enforcealign)
3422                 {
3423                     // MOV gpoffset[EBP],reg
3424                     cdb.genc1(0x89,modregxrm(2,reg,BPRM),FLconst,gpoffset);
3425                 }
3426                 else
3427                 {
3428                     // MOV gpoffset[ESP],reg
3429                     cdb.genc1(0x89,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset);
3430                 }
3431                 if (I64)
3432                     code_orrex(cdb.last(), REX_W);
3433                 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D ||
3434                     config.ehmethod == EHmethod.EH_DWARF)
3435                 {   // Emit debug_frame data giving location of saved register
3436                     code *c = cdb.finish();
3437                     pinholeopt(c, null);
3438                     dwarf_CFA_set_loc(calcblksize(c));  // address after save
3439                     dwarf_CFA_offset(reg, cast(int)(gpoffset - cfa_offset));
3440                     cdb.reset();
3441                     cdb.append(c);
3442                 }
3443                 gpoffset += REGSIZE;
3444             }
3445         }
3446     }
3447     else
3448     {
3449         while (topush)                      /* while registers to push      */
3450         {
3451             reg_t reg = findreg(topush);
3452             topush &= ~mask(reg);
3453             if (isXMMreg(reg))
3454             {
3455                 // SUB RSP,16
3456                 cod3_stackadj(cdb, 16);
3457                 // MOVUPD 0[RSP],xmm
3458                 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
3459                 EBPtoESP += 16;
3460                 spoff += 16;
3461             }
3462             else
3463             {
3464                 genpush(cdb, reg);
3465                 EBPtoESP += REGSIZE;
3466                 spoff += REGSIZE;
3467                 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D ||
3468                     config.ehmethod == EHmethod.EH_DWARF)
3469                 {   // Emit debug_frame data giving location of saved register
3470                     // relative to 0[EBP]
3471                     code *c = cdb.finish();
3472                     pinholeopt(c, null);
3473                     dwarf_CFA_set_loc(calcblksize(c));  // address after PUSH reg
3474                     dwarf_CFA_offset(reg, -EBPtoESP - cfa_offset);
3475                     cdb.reset();
3476                     cdb.append(c);
3477                 }
3478             }
3479         }
3480     }
3481 }
3482 
3483 /**************************************
3484  * Undo prolog_saveregs()
3485  */
3486 
3487 private void epilog_restoreregs(ref CodeBuilder cdb, regm_t topop)
3488 {
3489     debug
3490     if (topop & ~(XMMREGS | 0xFFFF))
3491         printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg));
3492 
3493     assert(!(topop & ~(XMMREGS | 0xFFFF)));
3494     if (pushoffuse)
3495     {
3496         // Save to preallocated section in the stack frame
3497         int xmmtopop = numbitsset(topop & XMMREGS);   // XMM regs take 16 bytes
3498         int gptopop = numbitsset(topop) - xmmtopop;   // general purpose registers to save
3499         targ_size_t xmmoffset = pushoff + BPoff;
3500         if (!hasframe || enforcealign)
3501             xmmoffset += EBPtoESP;
3502         targ_size_t gpoffset = xmmoffset + xmmtopop * 16;
3503         while (topop)
3504         {
3505             reg_t reg = findreg(topop);
3506             topop &= ~mask(reg);
3507             if (isXMMreg(reg))
3508             {
3509                 if (hasframe && !enforcealign)
3510                 {
3511                     // MOVUPD xmm,xmmoffset[EBP]
3512                     cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset);
3513                 }
3514                 else
3515                 {
3516                     // MOVUPD xmm,xmmoffset[ESP]
3517                     cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset);
3518                 }
3519                 xmmoffset += 16;
3520             }
3521             else
3522             {
3523                 if (hasframe && !enforcealign)
3524                 {
3525                     // MOV reg,gpoffset[EBP]
3526                     cdb.genc1(0x8B,modregxrm(2,reg,BPRM),FLconst,gpoffset);
3527                 }
3528                 else
3529                 {
3530                     // MOV reg,gpoffset[ESP]
3531                     cdb.genc1(0x8B,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset);
3532                 }
3533                 if (I64)
3534                     code_orrex(cdb.last(), REX_W);
3535                 gpoffset += REGSIZE;
3536             }
3537         }
3538     }
3539     else
3540     {
3541         reg_t reg = I64 ? XMM7 : DI;
3542         if (!(topop & XMMREGS))
3543             reg = R15;
3544         regm_t regm = 1 << reg;
3545 
3546         while (topop)
3547         {   if (topop & regm)
3548             {
3549                 if (isXMMreg(reg))
3550                 {
3551                     // MOVUPD xmm,0[RSP]
3552                     cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
3553                     // ADD RSP,16
3554                     cod3_stackadj(cdb, -16);
3555                 }
3556                 else
3557                 {
3558                     cdb.gen1(0x58 + (reg & 7));         // POP reg
3559                     if (reg & 8)
3560                         code_orrex(cdb.last(), REX_B);
3561                 }
3562                 topop &= ~regm;
3563             }
3564             regm >>= 1;
3565             reg--;
3566         }
3567     }
3568 }
3569 
3570 version (SCPP)
3571 {
3572 void prolog_trace(ref CodeBuilder cdb, bool farfunc, uint* regsaved)
3573 {
3574     Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_PRO_F : RTLSYM_TRACE_PRO_N);
3575     makeitextern(s);
3576     cdb.gencs(I16 ? 0x9A : CALL,0,FLfunc,s);      // CALL _trace
3577     if (!I16)
3578         code_orflag(cdb.last(),CFoff | CFselfrel);
3579     /* Embedding the function name inline after the call works, but it
3580      * makes disassembling the code annoying.
3581      */
3582     static if (ELFOBJ || MACHOBJ)
3583     {
3584         // Generate length prefixed name that is recognized by profiler
3585         size_t len = strlen(funcsym_p.Sident);
3586         char *buffer = cast(char *)malloc(len + 4);
3587         assert(buffer);
3588         if (len <= 254)
3589         {
3590             buffer[0] = len;
3591             memcpy(buffer + 1, funcsym_p.Sident, len);
3592             len++;
3593         }
3594         else
3595         {
3596             buffer[0] = 0xFF;
3597             buffer[1] = 0;
3598             buffer[2] = len & 0xFF;
3599             buffer[3] = len >> 8;
3600             memcpy(buffer + 4, funcsym_p.Sident, len);
3601             len += 4;
3602         }
3603         cdb.genasm(buffer, len);         // append func name
3604         free(buffer);
3605     }
3606     else
3607     {
3608         char [IDMAX+IDOHD+1] name = void;
3609         size_t len = objmod.mangle(funcsym_p,name.ptr);
3610         assert(len < name.length);
3611         cdb.genasm(name.ptr,len);             // append func name
3612     }
3613     *regsaved = s.Sregsaved;
3614 }
3615 }
3616 
3617 /******************************
3618  * Generate special varargs prolog for Posix 64 bit systems.
3619  * Params:
3620  *      cdb = sink for generated code
3621  *      sv = symbol for __va_argsave
3622  *      namedargs = registers that named parameters (not ... arguments) were passed in.
3623  */
3624 void prolog_genvarargs(ref CodeBuilder cdb, Symbol* sv, regm_t namedargs)
3625 {
3626     /* Generate code to move any arguments passed in registers into
3627      * the stack variable __va_argsave,
3628      * so we can reference it via pointers through va_arg().
3629      *   struct __va_argsave_t {
3630      *     size_t[6] regs;
3631      *     real[8] fpregs;
3632      *     uint offset_regs;
3633      *     uint offset_fpregs;
3634      *     void* stack_args;
3635      *     void* reg_args;
3636      *   }
3637      * The MOVAPS instructions seg fault if data is not aligned on
3638      * 16 bytes, so this gives us a nice check to ensure no mistakes.
3639         MOV     voff+0*8[RBP],EDI
3640         MOV     voff+1*8[RBP],ESI
3641         MOV     voff+2*8[RBP],RDX
3642         MOV     voff+3*8[RBP],RCX
3643         MOV     voff+4*8[RBP],R8
3644         MOV     voff+5*8[RBP],R9
3645         MOVZX   EAX,AL                      // AL = 0..8, # of XMM registers used
3646         SHL     EAX,2                       // 4 bytes for each MOVAPS
3647         LEA     R11,offset L2[RIP]
3648         SUB     R11,RAX
3649         LEA     RAX,voff+6*8+0x7F[RBP]
3650         JMP     R11d
3651         MOVAPS  -0x0F[RAX],XMM7             // only save XMM registers if actually used
3652         MOVAPS  -0x1F[RAX],XMM6
3653         MOVAPS  -0x2F[RAX],XMM5
3654         MOVAPS  -0x3F[RAX],XMM4
3655         MOVAPS  -0x4F[RAX],XMM3
3656         MOVAPS  -0x5F[RAX],XMM2
3657         MOVAPS  -0x6F[RAX],XMM1
3658         MOVAPS  -0x7F[RAX],XMM0
3659       L2:
3660         MOV     1[RAX],offset_regs          // set __va_argsave.offset_regs
3661         MOV     5[RAX],offset_fpregs        // set __va_argsave.offset_fpregs
3662         LEA     R11, Para.size+Para.offset[RBP]
3663         MOV     9[RAX],R11                  // set __va_argsave.stack_args
3664         SUB     RAX,6*8+0x7F                // point to start of __va_argsave
3665         MOV     6*8+8*16+4+4+8[RAX],RAX     // set __va_argsave.reg_args
3666     * RAX and R11 are destroyed.
3667     */
3668 
3669     /* Save registers into the voff area on the stack
3670      */
3671     targ_size_t voff = Auto.size + BPoff + sv.Soffset;  // EBP offset of start of sv
3672     const int vregnum = 6;
3673     const uint vsize = vregnum * 8 + 8 * 16;
3674 
3675     static immutable ubyte[vregnum] regs = [ DI,SI,DX,CX,R8,R9 ];
3676 
3677     if (!hasframe || enforcealign)
3678         voff += EBPtoESP;
3679 
3680     for (int i = 0; i < vregnum; i++)
3681     {
3682         uint r = regs[i];
3683         if (!(mask(r) & namedargs))  // unnamed arguments would be the ... ones
3684         {
3685             uint ea = (REX_W << 16) | modregxrm(2,r,BPRM);
3686             if (!hasframe || enforcealign)
3687                 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4);
3688             cdb.genc1(0x89,ea,FLconst,voff + i*8);
3689         }
3690     }
3691 
3692     genregs(cdb,0x0FB6,AX,AX);                 // MOVZX EAX,AL
3693     cdb.genc2(0xC1,modregrm(3,4,AX),2);                     // SHL EAX,2
3694     int raxoff = cast(int)(voff+6*8+0x7F);
3695     uint L2offset = (raxoff < -0x7F) ? 0x2D : 0x2A;
3696     if (!hasframe || enforcealign)
3697         L2offset += 1;                                      // +1 for sib byte
3698     // LEA R11,offset L2[RIP]
3699     cdb.genc1(LEA,(REX_W << 16) | modregxrm(0,R11,5),FLconst,L2offset);
3700     genregs(cdb,0x29,AX,R11);                  // SUB R11,RAX
3701     code_orrex(cdb.last(), REX_W);
3702     // LEA RAX,voff+vsize-6*8-16+0x7F[RBP]
3703     uint ea = (REX_W << 16) | modregrm(2,AX,BPRM);
3704     if (!hasframe || enforcealign)
3705         // add sib byte for [RSP] addressing
3706         ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4);
3707     cdb.genc1(LEA,ea,FLconst,raxoff);
3708     cdb.gen2(0xFF,modregrmx(3,4,R11));                      // JMP R11d
3709     for (int i = 0; i < 8; i++)
3710     {
3711         // MOVAPS -15-16*i[RAX],XMM7-i
3712         cdb.genc1(0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i);
3713     }
3714 
3715     /* Compute offset_regs and offset_fpregs
3716      */
3717     uint offset_regs = 0;
3718     uint offset_fpregs = vregnum * 8;
3719     for (int i = AX; i <= XMM7; i++)
3720     {
3721         regm_t m = mask(i);
3722         if (m & namedargs)
3723         {
3724             if (m & (mDI|mSI|mDX|mCX|mR8|mR9))
3725                 offset_regs += 8;
3726             else if (m & XMMREGS)
3727                 offset_fpregs += 16;
3728             namedargs &= ~m;
3729             if (!namedargs)
3730                 break;
3731         }
3732     }
3733     // MOV 1[RAX],offset_regs
3734     cdb.genc(0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs);
3735 
3736     // MOV 5[RAX],offset_fpregs
3737     cdb.genc(0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs);
3738 
3739     // LEA R11, Para.size+Para.offset[RBP]
3740     ea = modregxrm(2,R11,BPRM);
3741     if (!hasframe)
3742         ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4);
3743     Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1);
3744     cdb.genc1(LEA,(REX_W << 16) | ea,FLconst,Para.size + Para.offset);
3745 
3746     // MOV 9[RAX],R11
3747     cdb.genc1(0x89,(REX_W << 16) | modregxrm(2,R11,AX),FLconst,9);
3748 
3749     // SUB RAX,6*8+0x7F             // point to start of __va_argsave
3750     cdb.genc2(0x2D,0,6*8+0x7F);
3751     code_orrex(cdb.last(), REX_W);
3752 
3753     // MOV 6*8+8*16+4+4+8[RAX],RAX  // set __va_argsave.reg_args
3754     cdb.genc1(0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8);
3755 
3756     pinholeopt(cdb.peek(), null);
3757     useregs(mAX|mR11);
3758 }
3759 
3760 void prolog_gen_win64_varargs(ref CodeBuilder cdb)
3761 {
3762     /* The Microsoft scheme.
3763      * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80)
3764      * Copy registers onto stack.
3765          mov     8[RSP],RCX
3766          mov     010h[RSP],RDX
3767          mov     018h[RSP],R8
3768          mov     020h[RSP],R9
3769      */
3770 }
3771 
3772 /************************************
3773  * Params:
3774  *      cdb = generated code sink
3775  *      tf = what's the type of the function
3776  *      pushalloc = use PUSH to allocate on the stack rather than subtracting from SP
3777  *      namedargs = set to the registers that named parameters were passed in
3778  */
3779 void prolog_loadparams(ref CodeBuilder cdb, tym_t tyf, bool pushalloc, out regm_t namedargs)
3780 {
3781     //printf("prolog_loadparams()\n");
3782     debug
3783     for (SYMIDX si = 0; si < globsym.top; si++)
3784     {
3785         Symbol *s = globsym.tab[si];
3786         if (debugr && (s.Sclass == SCfastpar || s.Sclass == SCshadowreg))
3787         {
3788             printf("symbol '%s' is fastpar in register [%s,%s]\n", s.Sident.ptr,
3789                 regm_str(mask(s.Spreg)),
3790                 (s.Spreg2 == NOREG ? "NOREG" : regm_str(mask(s.Spreg2))));
3791             if (s.Sfl == FLreg)
3792                 printf("\tassigned to register %s\n", regm_str(mask(s.Sreglsw)));
3793         }
3794     }
3795 
3796     uint pushallocreg = (tyf == TYmfunc) ? CX : AX;
3797 
3798     /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were not assigned
3799      * registers into their stack locations.
3800      */
3801     regm_t shadowregm = 0;
3802     for (SYMIDX si = 0; si < globsym.top; si++)
3803     {
3804         Symbol *s = globsym.tab[si];
3805         uint sz = cast(uint)type_size(s.Stype);
3806 
3807         if ((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl != FLreg)
3808         {   // Argument is passed in a register
3809 
3810             type *t = s.Stype;
3811             type *t2 = null;
3812 
3813             tym_t tyb = tybasic(t.Tty);
3814 
3815             // This logic is same as FuncParamRegs_alloc function at src/dmd/backend/cod1.d
3816             //
3817             // Treat array of 1 the same as its element type
3818             // (Don't put volatile parameters in registers)
3819             if (tyb == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile))
3820             {
3821                 t = t.Tnext;
3822                 tyb = tybasic(t.Tty);
3823             }
3824 
3825             // If struct just wraps another type
3826             if (tyb == TYstruct)
3827             {
3828                 // On windows 64 bits, structs occupy a general purpose register,
3829                 // regardless of the struct size or the number & types of its fields.
3830                 if (config.exe != EX_WIN64)
3831                 {
3832                     type *targ1 = t.Ttag.Sstruct.Sarg1type;
3833                     t2 = t.Ttag.Sstruct.Sarg2type;
3834                     if (targ1)
3835                         t = targ1;
3836                 }
3837             }
3838 
3839             if (Symbol_Sisdead(s, anyiasm))
3840             {
3841                 // Ignore it, as it is never referenced
3842             }
3843             else
3844             {
3845                 targ_size_t offset = Fast.size + BPoff;
3846                 if (s.Sclass == SCshadowreg)
3847                     offset = Para.size;
3848                 offset += s.Soffset;
3849                 if (!hasframe || (enforcealign && s.Sclass != SCshadowreg))
3850                     offset += EBPtoESP;
3851 
3852                 reg_t preg = s.Spreg;
3853                 for (int i = 0; i < 2; ++i)     // twice, once for each possible parameter register
3854                 {
3855                     shadowregm |= mask(preg);
3856                     opcode_t op = 0x89;                  // MOV x[EBP],preg
3857                     if (isXMMreg(preg))
3858                         op = xmmstore(t.Tty);
3859                     if (!(pushalloc && preg == pushallocreg) || s.Sclass == SCshadowreg)
3860                     {
3861                         if (hasframe && (!enforcealign || s.Sclass == SCshadowreg))
3862                         {
3863                             // MOV x[EBP],preg
3864                             cdb.genc1(op,modregxrm(2,preg,BPRM),FLconst,offset);
3865                             if (isXMMreg(preg))
3866                             {
3867                                 checkSetVex(cdb.last(), t.Tty);
3868                             }
3869                             else
3870                             {
3871                                 //printf("%s Fast.size = %d, BPoff = %d, Soffset = %d, sz = %d\n",
3872                                 //         s.Sident, (int)Fast.size, (int)BPoff, (int)s.Soffset, (int)sz);
3873                                 if (I64 && sz > 4)
3874                                     code_orrex(cdb.last(), REX_W);
3875                             }
3876                         }
3877                         else
3878                         {
3879                             // MOV offset[ESP],preg
3880                             // BUG: byte size?
3881                             cdb.genc1(op,
3882                                       (modregrm(0,4,SP) << 8) |
3883                                        modregxrm(2,preg,4),FLconst,offset);
3884                             if (isXMMreg(preg))
3885                             {
3886                                 checkSetVex(cdb.last(), t.Tty);
3887                             }
3888                             else
3889                             {
3890                                 if (I64 && sz > 4)
3891                                     cdb.last().Irex |= REX_W;
3892                             }
3893                         }
3894                     }
3895                     preg = s.Spreg2;
3896                     if (preg == NOREG)
3897                         break;
3898                     if (t2)
3899                         t = t2;
3900                     offset += REGSIZE;
3901                 }
3902             }
3903         }
3904     }
3905 
3906     if (config.exe == EX_WIN64 && variadic(funcsym_p.Stype))
3907     {
3908         /* The Microsoft scheme.
3909          * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80)
3910          * Copy registers onto stack.
3911              mov     8[RSP],RCX or XMM0
3912              mov     010h[RSP],RDX or XMM1
3913              mov     018h[RSP],R8 or XMM2
3914              mov     020h[RSP],R9 or XMM3
3915          */
3916         static immutable reg_t[4] vregs = [ CX,DX,R8,R9 ];
3917         for (int i = 0; i < vregs.length; ++i)
3918         {
3919             uint preg = vregs[i];
3920             uint offset = cast(uint)(Para.size + i * REGSIZE);
3921             if (!(shadowregm & (mask(preg) | mask(XMM0 + i))))
3922             {
3923                 if (hasframe)
3924                 {
3925                     // MOV x[EBP],preg
3926                     cdb.genc1(0x89,
3927                                      modregxrm(2,preg,BPRM),FLconst, offset);
3928                     code_orrex(cdb.last(), REX_W);
3929                 }
3930                 else
3931                 {
3932                     // MOV offset[ESP],preg
3933                     cdb.genc1(0x89,
3934                                      (modregrm(0,4,SP) << 8) |
3935                                      modregxrm(2,preg,4),FLconst,offset + EBPtoESP);
3936                 }
3937                 cdb.last().Irex |= REX_W;
3938             }
3939         }
3940     }
3941 
3942     /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were assigned registers
3943      * into their assigned registers.
3944      * Note that we have a big problem if Pa is passed in R1 and assigned to R2,
3945      * and Pb is passed in R2 but assigned to R1. Detect it and assert.
3946      */
3947     regm_t assignregs = 0;
3948     for (SYMIDX si = 0; si < globsym.top; si++)
3949     {
3950         Symbol *s = globsym.tab[si];
3951         uint sz = cast(uint)type_size(s.Stype);
3952 
3953         if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
3954             namedargs |= s.Spregm();
3955 
3956         if ((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl == FLreg)
3957         {   // Argument is passed in a register
3958 
3959             type *t = s.Stype;
3960             type *t2 = null;
3961             if (tybasic(t.Tty) == TYstruct && config.exe != EX_WIN64)
3962             {   type *targ1 = t.Ttag.Sstruct.Sarg1type;
3963                 t2 = t.Ttag.Sstruct.Sarg2type;
3964                 if (targ1)
3965                     t = targ1;
3966             }
3967 
3968             reg_t preg = s.Spreg;
3969             reg_t r = s.Sreglsw;
3970             for (int i = 0; i < 2; ++i)
3971             {
3972                 if (preg == NOREG)
3973                     break;
3974                 assert(!(mask(preg) & assignregs));         // not already stepped on
3975                 assignregs |= mask(r);
3976 
3977                 // MOV reg,preg
3978                 if (r == preg)
3979                 {
3980                 }
3981                 else if (mask(preg) & XMMREGS)
3982                 {
3983                     const op = xmmload(t.Tty);      // MOVSS/D xreg,preg
3984                     uint xreg = r - XMM0;
3985                     cdb.gen2(op,modregxrmx(3,xreg,preg - XMM0));
3986                 }
3987                 else
3988                 {
3989                     //printf("test1 mov %s, %s\n", regstring[r], regstring[preg]);
3990                     genmovreg(cdb,r,preg);
3991                     if (I64 && sz == 8)
3992                         code_orrex(cdb.last(), REX_W);
3993                 }
3994                 preg = s.Spreg2;
3995                 r = s.Sregmsw;
3996                 if (t2)
3997                     t = t2;
3998             }
3999         }
4000     }
4001 
4002     /* For parameters that were passed on the stack, but are enregistered,
4003      * initialize the registers with the parameter stack values.
4004      * Do not use assignaddr(), as it will replace the stack reference with
4005      * the register.
4006      */
4007     for (SYMIDX si = 0; si < globsym.top; si++)
4008     {
4009         Symbol *s = globsym.tab[si];
4010         uint sz = cast(uint)type_size(s.Stype);
4011 
4012         if ((s.Sclass == SCregpar || s.Sclass == SCparameter) &&
4013             s.Sfl == FLreg &&
4014             (refparam
4015                 // This variable has been reference by a nested function
4016                 || MARS && s.Stype.Tty & mTYvolatile
4017                 ))
4018         {
4019             // MOV reg,param[BP]
4020             //assert(refparam);
4021             if (mask(s.Sreglsw) & XMMREGS)
4022             {
4023                 const op = xmmload(s.Stype.Tty);  // MOVSS/D xreg,mem
4024                 uint xreg = s.Sreglsw - XMM0;
4025                 cdb.genc1(op,modregxrm(2,xreg,BPRM),FLconst,Para.size + s.Soffset);
4026                 if (!hasframe)
4027                 {   // Convert to ESP relative address rather than EBP
4028                     code *c = cdb.last();
4029                     c.Irm = cast(ubyte)modregxrm(2,xreg,4);
4030                     c.Isib = modregrm(0,4,SP);
4031                     c.IEV1.Vpointer += EBPtoESP;
4032                 }
4033             }
4034             else
4035             {
4036                 cdb.genc1(sz == 1 ? 0x8A : 0x8B,
4037                     modregxrm(2,s.Sreglsw,BPRM),FLconst,Para.size + s.Soffset);
4038                 code *c = cdb.last();
4039                 if (!I16 && sz == SHORTSIZE)
4040                     c.Iflags |= CFopsize; // operand size
4041                 if (I64 && sz >= REGSIZE)
4042                     c.Irex |= REX_W;
4043                 if (I64 && sz == 1 && s.Sreglsw >= 4)
4044                     c.Irex |= REX;
4045                 if (!hasframe)
4046                 {   // Convert to ESP relative address rather than EBP
4047                     assert(!I16);
4048                     c.Irm = cast(ubyte)modregxrm(2,s.Sreglsw,4);
4049                     c.Isib = modregrm(0,4,SP);
4050                     c.IEV1.Vpointer += EBPtoESP;
4051                 }
4052                 if (sz > REGSIZE)
4053                 {
4054                     cdb.genc1(0x8B,
4055                         modregxrm(2,s.Sregmsw,BPRM),FLconst,Para.size + s.Soffset + REGSIZE);
4056                     code *cx = cdb.last();
4057                     if (I64)
4058                         cx.Irex |= REX_W;
4059                     if (!hasframe)
4060                     {   // Convert to ESP relative address rather than EBP
4061                         assert(!I16);
4062                         cx.Irm = cast(ubyte)modregxrm(2,s.Sregmsw,4);
4063                         cx.Isib = modregrm(0,4,SP);
4064                         cx.IEV1.Vpointer += EBPtoESP;
4065                     }
4066                 }
4067             }
4068         }
4069     }
4070 }
4071 
4072 /*******************************
4073  * Generate and return function epilog.
4074  * Output:
4075  *      retsize         Size of function epilog
4076  */
4077 
4078 void epilog(block *b)
4079 {
4080     code *cpopds;
4081     reg_t reg;
4082     reg_t regx;                      // register that's not a return reg
4083     regm_t topop,regm;
4084     targ_size_t xlocalsize = localsize;
4085 
4086     CodeBuilder cdbx; cdbx.ctor();
4087     tym_t tyf = funcsym_p.ty();
4088     tym_t tym = tybasic(tyf);
4089     bool farfunc = tyfarfunc(tym) != 0;
4090     if (!(b.Bflags & BFLepilog))       // if no epilog code
4091         goto Lret;                      // just generate RET
4092     regx = (b.BC == BCret) ? AX : CX;
4093 
4094     retsize = 0;
4095 
4096     if (tyf & mTYnaked)                 // if no prolog/epilog
4097         return;
4098 
4099     if (tym == TYifunc)
4100     {
4101         static immutable ubyte[5] ops2 = [ 0x07,0x1F,0x61,0xCF,0 ];
4102         static immutable ubyte[12] ops0 = [ 0x07,0x1F,0x5F,0x5E,
4103                                         0x5D,0x5B,0x5B,0x5A,
4104                                         0x59,0x58,0xCF,0 ];
4105 
4106         genregs(cdbx,0x8B,SP,BP);              // MOV SP,BP
4107         auto p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr;
4108         do
4109             cdbx.gen1(*p);
4110         while (*++p);
4111         goto Lopt;
4112     }
4113 
4114     if (config.flags & CFGtrace &&
4115         (!(config.flags4 & CFG4allcomdat) ||
4116          funcsym_p.Sclass == SCcomdat ||
4117          funcsym_p.Sclass == SCglobal ||
4118          (config.flags2 & CFG2comdat && SymInline(funcsym_p))
4119         )
4120        )
4121     {
4122         Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N);
4123         makeitextern(s);
4124         cdbx.gencs(I16 ? 0x9A : CALL,0,FLfunc,s);      // CALLF _trace
4125         if (!I16)
4126             code_orflag(cdbx.last(),CFoff | CFselfrel);
4127         useregs((ALLREGS | mBP | mES) & ~s.Sregsaved);
4128     }
4129 
4130     if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.exe == EX_WIN32 || MARS))
4131     {
4132         nteh_epilog(cdbx);
4133     }
4134 
4135     cpopds = null;
4136     if (tyf & mTYloadds)
4137     {
4138         cdbx.gen1(0x1F);             // POP DS
4139         cpopds = cdbx.last();
4140     }
4141 
4142     /* Pop all the general purpose registers saved on the stack
4143      * by the prolog code. Remember to do them in the reverse
4144      * order they were pushed.
4145      */
4146     topop = fregsaved & ~mfuncreg;
4147     epilog_restoreregs(cdbx, topop);
4148 
4149     version (MARS)
4150     {
4151         if (usednteh & NTEHjmonitor)
4152         {
4153             regm_t retregs = 0;
4154             if (b.BC == BCretexp)
4155                 retregs = regmask(b.Belem.Ety, tym);
4156             nteh_monitor_epilog(cdbx,retregs);
4157             xlocalsize += 8;
4158         }
4159     }
4160 
4161     if (config.wflags & WFwindows && farfunc)
4162     {
4163         int wflags = config.wflags;
4164         if (wflags & WFreduced && !(tyf & mTYexport))
4165         {   // reduced prolog/epilog for non-exported functions
4166             wflags &= ~(WFdgroup | WFds | WFss);
4167             if (!(wflags & WFsaveds))
4168                 goto L4;
4169         }
4170 
4171         if (localsize)
4172         {
4173             cdbx.genc1(LEA,modregrm(1,SP,6),FLconst,cast(targ_uns)-2); /* LEA SP,-2[BP] */
4174         }
4175         if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
4176         {
4177             if (cpopds)
4178                 cpopds.Iop = NOP;              // don't need previous one
4179             cdbx.gen1(0x1F);                    // POP DS
4180         }
4181         cdbx.gen1(0x58 + BP);                   // POP BP
4182         if (config.wflags & WFincbp)
4183             cdbx.gen1(0x48 + BP);               // DEC BP
4184         assert(hasframe);
4185     }
4186     else
4187     {
4188         if (needframe || (xlocalsize && hasframe))
4189         {
4190         L4:
4191             assert(hasframe);
4192             if (xlocalsize || enforcealign)
4193             {
4194                 if (config.flags2 & CFG2stomp)
4195                 {   /*   MOV  ECX,0xBEAF
4196                      * L1:
4197                      *   MOV  [ESP],ECX
4198                      *   ADD  ESP,4
4199                      *   CMP  EBP,ESP
4200                      *   JNE  L1
4201                      *   POP  EBP
4202                      */
4203                     /* Value should be:
4204                      * 1. != 0 (code checks for null pointers)
4205                      * 2. be odd (to mess up alignment)
4206                      * 3. fall in first 64K (likely marked as inaccessible)
4207                      * 4. be a value that stands out in the debugger
4208                      */
4209                     assert(I32 || I64);
4210                     targ_size_t value = 0x0000BEAF;
4211                     reg_t regcx = CX;
4212                     mfuncreg &= ~mask(regcx);
4213                     uint grex = I64 ? REX_W << 16 : 0;
4214                     cdbx.genc2(0xC7,grex | modregrmx(3,0,regcx),value);   // MOV regcx,value
4215                     cdbx.gen2sib(0x89,grex | modregrm(0,regcx,4),modregrm(0,4,SP)); // MOV [ESP],regcx
4216                     code *c1 = cdbx.last();
4217                     cdbx.genc2(0x81,grex | modregrm(3,0,SP),REGSIZE);     // ADD ESP,REGSIZE
4218                     genregs(cdbx,0x39,SP,BP);                             // CMP EBP,ESP
4219                     if (I64)
4220                         code_orrex(cdbx.last(),REX_W);
4221                     genjmp(cdbx,JNE,FLcode,cast(block *)c1);                  // JNE L1
4222                     // explicitly mark as short jump, needed for correct retsize calculation (Bugzilla 15779)
4223                     cdbx.last().Iflags &= ~CFjmp16;
4224                     cdbx.gen1(0x58 + BP);                                 // POP BP
4225                 }
4226                 else if (config.exe == EX_WIN64)
4227                 {   // See http://msdn.microsoft.com/en-us/library/tawsa7cb(v=vs.80).aspx
4228                     // LEA RSP,0[RBP]
4229                     cdbx.genc1(LEA,(REX_W<<16)|modregrm(2,SP,BPRM),FLconst,0);
4230                     cdbx.gen1(0x58 + BP);      // POP RBP
4231                 }
4232                 else if (config.target_cpu >= TARGET_80286 &&
4233                     !(config.target_cpu >= TARGET_80386 && config.flags4 & CFG4speed)
4234                    )
4235                     cdbx.gen1(0xC9);           // LEAVE
4236                 else if (0 && xlocalsize == REGSIZE && Alloca.size == 0 && I32)
4237                 {   // This doesn't work - I should figure out why
4238                     mfuncreg &= ~mask(regx);
4239                     cdbx.gen1(0x58 + regx);    // POP regx
4240                     cdbx.gen1(0x58 + BP);      // POP BP
4241                 }
4242                 else
4243                 {
4244                     genregs(cdbx,0x8B,SP,BP);  // MOV SP,BP
4245                     if (I64)
4246                         code_orrex(cdbx.last(), REX_W);   // MOV RSP,RBP
4247                     cdbx.gen1(0x58 + BP);      // POP BP
4248                 }
4249             }
4250             else
4251                 cdbx.gen1(0x58 + BP);          // POP BP
4252             if (config.wflags & WFincbp && farfunc)
4253                 cdbx.gen1(0x48 + BP);              // DEC BP
4254         }
4255         else if (xlocalsize == REGSIZE && (!I16 || b.BC == BCret))
4256         {
4257             mfuncreg &= ~mask(regx);
4258             cdbx.gen1(0x58 + regx);                    // POP regx
4259         }
4260         else if (xlocalsize)
4261             cod3_stackadj(cdbx, cast(int)-xlocalsize);
4262     }
4263     if (b.BC == BCret || b.BC == BCretexp)
4264     {
4265 Lret:
4266         opcode_t op = tyfarfunc(tym) ? 0xCA : 0xC2;
4267         if (tym == TYhfunc)
4268         {
4269             cdbx.genc2(0xC2,0,4);                       // RET 4
4270         }
4271         else if (!typfunc(tym) ||                       // if caller cleans the stack
4272                  config.exe == EX_WIN64 ||
4273                  Para.offset == 0)                      // or nothing pushed on the stack anyway
4274         {
4275             op++;                                       // to a regular RET
4276             cdbx.gen1(op);
4277         }
4278         else
4279         {   // Stack is always aligned on register size boundary
4280             Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1);
4281             if (Para.offset >= 0x10000)
4282             {
4283                 /*
4284                     POP REG
4285                     ADD ESP, Para.offset
4286                     JMP REG
4287                 */
4288                 cdbx.gen1(0x58+regx);
4289                 cdbx.genc2(0x81, modregrm(3,0,SP), Para.offset);
4290                 if (I64)
4291                     code_orrex(cdbx.last(), REX_W);
4292                 cdbx.genc2(0xFF, modregrm(3,4,regx), 0);
4293                 if (I64)
4294                     code_orrex(cdbx.last(), REX_W);
4295             }
4296             else
4297                 cdbx.genc2(op,0,Para.offset);          // RET Para.offset
4298         }
4299     }
4300 
4301 Lopt:
4302     // If last instruction in ce is ADD SP,imm, and first instruction
4303     // in c sets SP, we can dump the ADD.
4304     CodeBuilder cdb; cdb.ctor();
4305     cdb.append(b.Bcode);
4306     code *cr = cdb.last();
4307     code *c = cdbx.peek();
4308     if (cr && c && !I64)
4309     {
4310         if (cr.Iop == 0x81 && cr.Irm == modregrm(3,0,SP))     // if ADD SP,imm
4311         {
4312             if (
4313                 c.Iop == 0xC9 ||                                  // LEAVE
4314                 (c.Iop == 0x8B && c.Irm == modregrm(3,SP,BP)) || // MOV SP,BP
4315                 (c.Iop == LEA && c.Irm == modregrm(1,SP,6))     // LEA SP,-imm[BP]
4316                )
4317                 cr.Iop = NOP;
4318             else if (c.Iop == 0x58 + BP)                       // if POP BP
4319             {
4320                 cr.Iop = 0x8B;
4321                 cr.Irm = modregrm(3,SP,BP);                    // MOV SP,BP
4322             }
4323         }
4324         else
4325         {
4326 static if (0)
4327 {
4328         // These optimizations don't work if the called function
4329         // cleans off the stack.
4330         if (c.Iop == 0xC3 && cr.Iop == CALL)     // CALL near
4331         {
4332             cr.Iop = 0xE9;                             // JMP near
4333             c.Iop = NOP;
4334         }
4335         else if (c.Iop == 0xCB && cr.Iop == 0x9A)     // CALL far
4336         {
4337             cr.Iop = 0xEA;                             // JMP far
4338             c.Iop = NOP;
4339         }
4340 }
4341         }
4342     }
4343 
4344     pinholeopt(c, null);
4345     retsize += calcblksize(c);          // compute size of function epilog
4346     cdb.append(cdbx);
4347     b.Bcode = cdb.finish();
4348 }
4349 
4350 /*******************************
4351  * Return offset of SP from BP.
4352  */
4353 
4354 targ_size_t cod3_spoff()
4355 {
4356     //printf("spoff = x%x, localsize = x%x\n", (int)spoff, (int)localsize);
4357     return spoff + localsize;
4358 }
4359 
4360 void gen_spill_reg(ref CodeBuilder cdb, Symbol* s, bool toreg)
4361 {
4362     code cs;
4363     const regm_t keepmsk = toreg ? RMload : RMstore;
4364 
4365     elem* e = el_var(s); // so we can trick getlvalue() into working for us
4366 
4367     if (mask(s.Sreglsw) & XMMREGS)
4368     {   // Convert to save/restore of XMM register
4369         if (toreg)
4370             cs.Iop = xmmload(s.Stype.Tty);        // MOVSS/D xreg,mem
4371         else
4372             cs.Iop = xmmstore(s.Stype.Tty);       // MOVSS/D mem,xreg
4373         getlvalue(cdb,&cs,e,keepmsk);
4374         cs.orReg(s.Sreglsw - XMM0);
4375         cdb.gen(&cs);
4376     }
4377     else
4378     {
4379         const int sz = cast(int)type_size(s.Stype);
4380         cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg
4381         cs.Iop ^= (sz == 1);
4382         getlvalue(cdb,&cs,e,keepmsk);
4383         cs.orReg(s.Sreglsw);
4384         if (I64 && sz == 1 && s.Sreglsw >= 4)
4385             cs.Irex |= REX;
4386         if ((cs.Irm & 0xC0) == 0xC0 &&                  // reg,reg
4387             (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 &&      // registers match
4388             (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0)      // REX_R and REX_B match
4389         { }                                             // skip MOV reg,reg
4390         else
4391             cdb.gen(&cs);
4392         if (sz > REGSIZE)
4393         {
4394             cs.setReg(s.Sregmsw);
4395             getlvalue_msw(&cs);
4396             if ((cs.Irm & 0xC0) == 0xC0 &&              // reg,reg
4397                 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 &&  // registers match
4398                 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0)  // REX_R and REX_B match
4399             { }                                         // skip MOV reg,reg
4400             else
4401                 cdb.gen(&cs);
4402         }
4403     }
4404 
4405     el_free(e);
4406 }
4407 
4408 /****************************
4409  * Generate code for, and output a thunk.
4410  * Params:
4411  *      sthunk =  Symbol of thunk
4412  *      sfunc =   Symbol of thunk's target function
4413  *      thisty =  Type of this pointer
4414  *      p =       ESP parameter offset to this pointer
4415  *      d =       offset to add to 'this' pointer
4416  *      d2 =      offset from 'this' to vptr
4417  *      i =       offset into vtbl[]
4418  */
4419 
4420 void cod3_thunk(Symbol *sthunk,Symbol *sfunc,uint p,tym_t thisty,
4421         uint d,int i,uint d2)
4422 {
4423     targ_size_t thunkoffset;
4424 
4425     int seg = sthunk.Sseg;
4426     cod3_align(seg);
4427 
4428     // Skip over return address
4429     tym_t thunkty = tybasic(sthunk.ty());
4430     if (tyfarfunc(thunkty))
4431         p += I32 ? 8 : tysize(TYfptr);          // far function
4432     else
4433         p += tysize(TYnptr);
4434 
4435     CodeBuilder cdb; cdb.ctor();
4436     if (!I16)
4437     {
4438         /*
4439            Generate:
4440             ADD p[ESP],d
4441            For direct call:
4442             JMP sfunc
4443            For virtual call:
4444             MOV EAX, p[ESP]                     EAX = this
4445             MOV EAX, d2[EAX]                    EAX = this.vptr
4446             JMP i[EAX]                          jump to virtual function
4447          */
4448         reg_t reg = 0;
4449         if (cast(int)d < 0)
4450         {
4451             d = -d;
4452             reg = 5;                            // switch from ADD to SUB
4453         }
4454         if (thunkty == TYmfunc)
4455         {                                       // ADD ECX,d
4456             if (d)
4457                 cdb.genc2(0x81,modregrm(3,reg,CX),d);
4458         }
4459         else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc))
4460         {                                       // ADD EAX,d
4461             int rm = AX;
4462             if (config.exe == EX_WIN64)
4463                 rm = CX;
4464             else if (I64)
4465                 rm = DI;
4466             if (d)
4467                 cdb.genc2(0x81,modregrm(3,reg,rm),d);
4468         }
4469         else
4470         {
4471             cdb.genc(0x81,modregrm(2,reg,4),
4472                 FLconst,p,                      // to this
4473                 FLconst,d);                     // ADD p[ESP],d
4474             cdb.last().Isib = modregrm(0,4,SP);
4475         }
4476         if (I64 && cdb.peek())
4477             cdb.last().Irex |= REX_W;
4478     }
4479     else
4480     {
4481         /*
4482            Generate:
4483             MOV BX,SP
4484             ADD [SS:] p[BX],d
4485            For direct call:
4486             JMP sfunc
4487            For virtual call:
4488             MOV BX, p[BX]                       BX = this
4489             MOV BX, d2[BX]                      BX = this.vptr
4490             JMP i[BX]                           jump to virtual function
4491          */
4492 
4493         genregs(cdb,0x89,SP,BX);           // MOV BX,SP
4494         cdb.genc(0x81,modregrm(2,0,7),
4495             FLconst,p,                                  // to this
4496             FLconst,d);                                 // ADD p[BX],d
4497         if (config.wflags & WFssneds ||
4498             // If DS needs reloading from SS,
4499             // then assume SS != DS on thunk entry
4500             (LARGEDATA && config.wflags & WFss))
4501             cdb.last().Iflags |= CFss;                 // SS:
4502     }
4503 
4504     if ((i & 0xFFFF) != 0xFFFF)                 // if virtual call
4505     {
4506         const bool FARTHIS = (tysize(thisty) > REGSIZE);
4507         const bool FARVPTR = FARTHIS;
4508 
4509         assert(thisty != TYvptr);               // can't handle this case
4510 
4511         if (!I16)
4512         {
4513             assert(!FARTHIS && !LARGECODE);
4514             if (thunkty == TYmfunc)     // if 'this' is in ECX
4515             {
4516                 // MOV EAX,d2[ECX]
4517                 cdb.genc1(0x8B,modregrm(2,AX,CX),FLconst,d2);
4518             }
4519             else if (thunkty == TYjfunc)        // if 'this' is in EAX
4520             {
4521                 // MOV EAX,d2[EAX]
4522                 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2);
4523             }
4524             else
4525             {
4526                 // MOV EAX,p[ESP]
4527                 cdb.genc1(0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,cast(targ_uns) p);
4528                 if (I64)
4529                     cdb.last().Irex |= REX_W;
4530 
4531                 // MOV EAX,d2[EAX]
4532                 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2);
4533             }
4534             if (I64)
4535                 code_orrex(cdb.last(), REX_W);
4536                                                         // JMP i[EAX]
4537             cdb.genc1(0xFF,modregrm(2,4,0),FLconst,cast(targ_uns) i);
4538         }
4539         else
4540         {
4541             // MOV/LES BX,[SS:] p[BX]
4542             cdb.genc1((FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,cast(targ_uns) p);
4543             if (config.wflags & WFssneds ||
4544                 // If DS needs reloading from SS,
4545                 // then assume SS != DS on thunk entry
4546                 (LARGEDATA && config.wflags & WFss))
4547                 cdb.last().Iflags |= CFss;             // SS:
4548 
4549             // MOV/LES BX,[ES:]d2[BX]
4550             cdb.genc1((FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2);
4551             if (FARTHIS)
4552                 cdb.last().Iflags |= CFes;             // ES:
4553 
4554                                                         // JMP i[BX]
4555             cdb.genc1(0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,cast(targ_uns) i);
4556             if (FARVPTR)
4557                 cdb.last().Iflags |= CFes;             // ES:
4558         }
4559     }
4560     else
4561     {
4562 static if (0)
4563 {
4564         localgot = null;                // no local variables
4565         code *c1 = load_localgot();
4566         if (c1)
4567         {
4568             assignaddrc(c1);
4569             cdb.append(c1);
4570         }
4571 }
4572         cdb.gencs((LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); // JMP sfunc
4573         cdb.last().Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff);
4574     }
4575 
4576     thunkoffset = Offset(seg);
4577     code *c = cdb.finish();
4578     pinholeopt(c,null);
4579     codout(seg,c);
4580     code_free(c);
4581 
4582     sthunk.Soffset = thunkoffset;
4583     sthunk.Ssize = Offset(seg) - thunkoffset; // size of thunk
4584     sthunk.Sseg = seg;
4585     static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
4586     {
4587         objmod.pubdef(seg,sthunk,sthunk.Soffset);
4588     }
4589     static if (TARGET_WINDOS)
4590     {
4591         if (config.objfmt == OBJ_MSCOFF)
4592             objmod.pubdef(seg,sthunk,sthunk.Soffset);
4593     }
4594     searchfixlist(sthunk);              // resolve forward refs
4595 }
4596 
4597 /*****************************
4598  * Assume symbol s is extern.
4599  */
4600 
4601 void makeitextern(Symbol *s)
4602 {
4603     if (s.Sxtrnnum == 0)
4604     {
4605         s.Sclass = SCextern;           /* external             */
4606         /*printf("makeitextern(x%x)\n",s);*/
4607         objmod.external(s);
4608     }
4609 }
4610 
4611 
4612 /*******************************
4613  * Replace JMPs in Bgotocode with JMP SHORTs whereever possible.
4614  * This routine depends on FLcode jumps to only be forward
4615  * referenced.
4616  * BFLjmpoptdone is set to true if nothing more can be done
4617  * with this block.
4618  * Input:
4619  *      flag    !=0 means don't have correct Boffsets yet
4620  * Returns:
4621  *      number of bytes saved
4622  */
4623 
4624 int branch(block *bl,int flag)
4625 {
4626     int bytesaved;
4627     code* c,cn,ct;
4628     targ_size_t offset,disp;
4629     targ_size_t csize;
4630 
4631     if (!flag)
4632         bl.Bflags |= BFLjmpoptdone;      // assume this will be all
4633     c = bl.Bcode;
4634     if (!c)
4635         return 0;
4636     bytesaved = 0;
4637     offset = bl.Boffset;                 /* offset of start of block     */
4638     while (1)
4639     {
4640         ubyte op;
4641 
4642         csize = calccodsize(c);
4643         cn = code_next(c);
4644         op = cast(ubyte)c.Iop;
4645         if ((op & ~0x0F) == 0x70 && c.Iflags & CFjmp16 ||
4646             (op == JMP && !(c.Iflags & CFjmp5)))
4647         {
4648           L1:
4649             switch (c.IFL2)
4650             {
4651                 case FLblock:
4652                     if (flag)           // no offsets yet, don't optimize
4653                         goto L3;
4654                     disp = c.IEV2.Vblock.Boffset - offset - csize;
4655 
4656                     /* If this is a forward branch, and there is an aligned
4657                      * block intervening, it is possible that shrinking
4658                      * the jump instruction will cause it to be out of
4659                      * range of the target. This happens if the alignment
4660                      * prevents the target block from moving correspondingly
4661                      * closer.
4662                      */
4663                     if (disp >= 0x7F-4 && c.IEV2.Vblock.Boffset > offset)
4664                     {   /* Look for intervening alignment
4665                          */
4666                         for (block *b = bl.Bnext; b; b = b.Bnext)
4667                         {
4668                             if (b.Balign)
4669                             {
4670                                 bl.Bflags &= ~BFLjmpoptdone;   // some JMPs left
4671                                 goto L3;
4672                             }
4673                             if (b == c.IEV2.Vblock)
4674                                 break;
4675                         }
4676                     }
4677 
4678                     break;
4679 
4680                 case FLcode:
4681                 {
4682                     code *cr;
4683 
4684                     disp = 0;
4685 
4686                     ct = c.IEV2.Vcode;         /* target of branch     */
4687                     assert(ct.Iflags & (CFtarg | CFtarg2));
4688                     for (cr = cn; cr; cr = code_next(cr))
4689                     {
4690                         if (cr == ct)
4691                             break;
4692                         disp += calccodsize(cr);
4693                     }
4694 
4695                     if (!cr)
4696                     {   // Didn't find it in forward search. Try backwards jump
4697                         int s = 0;
4698                         disp = 0;
4699                         for (cr = bl.Bcode; cr != cn; cr = code_next(cr))
4700                         {
4701                             assert(cr != null); // must have found it
4702                             if (cr == ct)
4703                                 s = 1;
4704                             if (s)
4705                                 disp += calccodsize(cr);
4706                         }
4707                     }
4708 
4709                     if (config.flags4 & CFG4optimized && !flag)
4710                     {
4711                         /* Propagate branch forward past junk   */
4712                         while (1)
4713                         {
4714                             if (ct.Iop == NOP ||
4715                                 ct.Iop == (ESCAPE | ESClinnum))
4716                             {
4717                                 ct = code_next(ct);
4718                                 if (!ct)
4719                                     goto L2;
4720                             }
4721                             else
4722                             {
4723                                 c.IEV2.Vcode = ct;
4724                                 ct.Iflags |= CFtarg;
4725                                 break;
4726                             }
4727                         }
4728 
4729                         /* And eliminate jmps to jmps   */
4730                         if ((op == ct.Iop || ct.Iop == JMP) &&
4731                             (op == JMP || c.Iflags & CFjmp16))
4732                         {
4733                             c.IFL2 = ct.IFL2;
4734                             c.IEV2.Vcode = ct.IEV2.Vcode;
4735                             /*printf("eliminating branch\n");*/
4736                             goto L1;
4737                         }
4738                      L2:
4739                         { }
4740                     }
4741                 }
4742                     break;
4743 
4744                 default:
4745                     goto L3;
4746             }
4747 
4748             if (disp == 0)                      // bra to next instruction
4749             {
4750                 bytesaved += csize;
4751                 c.Iop = NOP;                   // del branch instruction
4752                 c.IEV2.Vcode = null;
4753                 c = cn;
4754                 if (!c)
4755                     break;
4756                 continue;
4757             }
4758             else if (cast(targ_size_t)cast(targ_schar)(disp - 2) == (disp - 2) &&
4759                      cast(targ_size_t)cast(targ_schar)disp == disp)
4760             {
4761                 if (op == JMP)
4762                 {
4763                     c.Iop = JMPS;              // JMP SHORT
4764                     bytesaved += I16 ? 1 : 3;
4765                 }
4766                 else                            // else Jcond
4767                 {
4768                     c.Iflags &= ~CFjmp16;      // a branch is ok
4769                     bytesaved += I16 ? 3 : 4;
4770 
4771                     // Replace a cond jump around a call to a function that
4772                     // never returns with a cond jump to that function.
4773                     if (config.flags4 & CFG4optimized &&
4774                         config.target_cpu >= TARGET_80386 &&
4775                         disp == (I16 ? 3 : 5) &&
4776                         cn &&
4777                         cn.Iop == CALL &&
4778                         cn.IFL2 == FLfunc &&
4779                         cn.IEV2.Vsym.Sflags & SFLexit &&
4780                         !(cn.Iflags & (CFtarg | CFtarg2))
4781                        )
4782                     {
4783                         cn.Iop = 0x0F00 | ((c.Iop & 0x0F) ^ 0x81);
4784                         c.Iop = NOP;
4785                         c.IEV2.Vcode = null;
4786                         bytesaved++;
4787 
4788                         // If nobody else points to ct, we can remove the CFtarg
4789                         if (flag && ct)
4790                         {
4791                             code *cx;
4792                             for (cx = bl.Bcode; 1; cx = code_next(cx))
4793                             {
4794                                 if (!cx)
4795                                 {
4796                                     ct.Iflags &= ~CFtarg;
4797                                     break;
4798                                 }
4799                                 if (cx.IEV2.Vcode == ct)
4800                                     break;
4801                             }
4802                         }
4803                     }
4804                 }
4805                 csize = calccodsize(c);
4806             }
4807             else
4808                 bl.Bflags &= ~BFLjmpoptdone;   // some JMPs left
4809         }
4810 L3:
4811         if (cn)
4812         {
4813             offset += csize;
4814             c = cn;
4815         }
4816         else
4817             break;
4818     }
4819     //printf("bytesaved = x%x\n",bytesaved);
4820     return bytesaved;
4821 }
4822 
4823 
4824 /************************************************
4825  * Adjust all Soffset's of stack variables so they
4826  * are all relative to the frame pointer.
4827  */
4828 
4829 version (MARS)
4830 {
4831 void cod3_adjSymOffsets()
4832 {
4833     SYMIDX si;
4834 
4835     //printf("cod3_adjSymOffsets()\n");
4836     for (si = 0; si < globsym.top; si++)
4837     {
4838         //printf("\tglobsym.tab[%d] = %p\n",si,globsym.tab[si]);
4839         Symbol *s = globsym.tab[si];
4840 
4841         switch (s.Sclass)
4842         {
4843             case SCparameter:
4844             case SCregpar:
4845             case SCshadowreg:
4846 //printf("s = '%s', Soffset = x%x, Para.size = x%x, EBPtoESP = x%x\n", s.Sident, s.Soffset, Para.size, EBPtoESP);
4847                 s.Soffset += Para.size;
4848                 if (0 && !(funcsym_p.Sfunc.Fflags3 & Fmember))
4849                 {
4850                     if (!hasframe)
4851                         s.Soffset += EBPtoESP;
4852                     if (funcsym_p.Sfunc.Fflags3 & Fnested)
4853                         s.Soffset += REGSIZE;
4854                 }
4855                 break;
4856 
4857             case SCfastpar:
4858 //printf("\tfastpar %s %p Soffset %x Fast.size %x BPoff %x\n", s.Sident, s, (int)s.Soffset, (int)Fast.size, (int)BPoff);
4859                 s.Soffset += Fast.size + BPoff;
4860                 break;
4861 
4862             case SCauto:
4863             case SCregister:
4864                 if (s.Sfl == FLfast)
4865                     s.Soffset += Fast.size + BPoff;
4866                 else
4867 //printf("s = '%s', Soffset = x%x, Auto.size = x%x, BPoff = x%x EBPtoESP = x%x\n", s.Sident, (int)s.Soffset, (int)Auto.size, (int)BPoff, (int)EBPtoESP);
4868 //              if (!(funcsym_p.Sfunc.Fflags3 & Fnested))
4869                     s.Soffset += Auto.size + BPoff;
4870                 break;
4871 
4872             case SCbprel:
4873                 break;
4874 
4875             default:
4876                 continue;
4877         }
4878         static if (0)
4879         {
4880             if (!hasframe)
4881                 s.Soffset += EBPtoESP;
4882         }
4883     }
4884 }
4885 
4886 }
4887 
4888 /*******************************
4889  * Take symbol info in union ev and replace it with a real address
4890  * in Vpointer.
4891  */
4892 
4893 void assignaddr(block *bl)
4894 {
4895     int EBPtoESPsave = EBPtoESP;
4896     int hasframesave = hasframe;
4897 
4898     if (bl.Bflags & BFLoutsideprolog)
4899     {
4900         EBPtoESP = -REGSIZE;
4901         hasframe = 0;
4902     }
4903     assignaddrc(bl.Bcode);
4904     hasframe = hasframesave;
4905     EBPtoESP = EBPtoESPsave;
4906 }
4907 
4908 void assignaddrc(code *c)
4909 {
4910     int sn;
4911     Symbol *s;
4912     ubyte ins,rm;
4913     targ_size_t soff;
4914     targ_size_t base;
4915 
4916     base = EBPtoESP;
4917     for (; c; c = code_next(c))
4918     {
4919         debug
4920         {
4921         if (0)
4922         {       printf("assignaddrc()\n");
4923                 code_print(c);
4924         }
4925         if (code_next(c) && code_next(code_next(c)) == c)
4926             assert(0);
4927         }
4928 
4929         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
4930             ins = vex_inssize(c);
4931         else if ((c.Iop & 0xFFFD00) == 0x0F3800)
4932             ins = inssize2[(c.Iop >> 8) & 0xFF];
4933         else if ((c.Iop & 0xFF00) == 0x0F00)
4934             ins = inssize2[c.Iop & 0xFF];
4935         else if ((c.Iop & 0xFF) == ESCAPE)
4936         {
4937             if (c.Iop == (ESCAPE | ESCadjesp))
4938             {
4939                 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,(long)c.IEV1.Vint);
4940                 EBPtoESP += c.IEV1.Vint;
4941                 c.Iop = NOP;
4942             }
4943             else if (c.Iop == (ESCAPE | ESCfixesp))
4944             {
4945                 //printf("fix ESP\n");
4946                 if (hasframe)
4947                 {
4948                     // LEA ESP,-EBPtoESP[EBP]
4949                     c.Iop = LEA;
4950                     if (c.Irm & 8)
4951                         c.Irex |= REX_R;
4952                     c.Irm = modregrm(2,SP,BP);
4953                     c.Iflags = CFoff;
4954                     c.IFL1 = FLconst;
4955                     c.IEV1.Vuns = -EBPtoESP;
4956                     if (enforcealign)
4957                     {
4958                         // AND ESP, -STACKALIGN
4959                         code *cn = code_calloc();
4960                         cn.Iop = 0x81;
4961                         cn.Irm = modregrm(3, 4, SP);
4962                         cn.Iflags = CFoff;
4963                         cn.IFL2 = FLconst;
4964                         cn.IEV2.Vsize_t = -STACKALIGN;
4965                         if (I64)
4966                             c.Irex |= REX_W;
4967                         cn.next = c.next;
4968                         c.next = cn;
4969                     }
4970                 }
4971             }
4972             else if (c.Iop == (ESCAPE | ESCframeptr))
4973             {   // Convert to load of frame pointer
4974                 // c.Irm is the register to use
4975                 if (hasframe && !enforcealign)
4976                 {   // MOV reg,EBP
4977                     c.Iop = 0x89;
4978                     if (c.Irm & 8)
4979                         c.Irex |= REX_B;
4980                     c.Irm = modregrm(3,BP,c.Irm & 7);
4981                 }
4982                 else
4983                 {   // LEA reg,EBPtoESP[ESP]
4984                     c.Iop = LEA;
4985                     if (c.Irm & 8)
4986                         c.Irex |= REX_R;
4987                     c.Irm = modregrm(2,c.Irm & 7,4);
4988                     c.Isib = modregrm(0,4,SP);
4989                     c.Iflags = CFoff;
4990                     c.IFL1 = FLconst;
4991                     c.IEV1.Vuns = EBPtoESP;
4992                 }
4993             }
4994             if (I64)
4995                 c.Irex |= REX_W;
4996             continue;
4997         }
4998         else
4999             ins = inssize[c.Iop & 0xFF];
5000         if (!(ins & M) ||
5001             ((rm = c.Irm) & 0xC0) == 0xC0)
5002             goto do2;           /* if no first operand          */
5003         if (is32bitaddr(I32,c.Iflags))
5004         {
5005 
5006             if (
5007                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
5008                )
5009                 goto do2;       /* if no first operand  */
5010         }
5011         else
5012         {
5013             if (
5014                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
5015                )
5016                 goto do2;       /* if no first operand  */
5017         }
5018         s = c.IEV1.Vsym;
5019         switch (c.IFL1)
5020         {
5021             case FLdata:
5022                 if (config.objfmt == OBJ_OMF && s.Sclass != SCcomdat)
5023                 {
5024                     version (MARS)
5025                     {
5026                         c.IEV1.Vseg = s.Sseg;
5027                     }
5028                     else
5029                     {
5030                         c.IEV1.Vseg = DATA;
5031                     }
5032                     c.IEV1.Vpointer += s.Soffset;
5033                     c.IFL1 = FLdatseg;
5034                 }
5035                 else
5036                     c.IFL1 = FLextern;
5037                 goto do2;
5038 
5039             case FLudata:
5040                 if (config.objfmt == OBJ_OMF)
5041                 {
5042                     version (MARS)
5043                     {
5044                         c.IEV1.Vseg = s.Sseg;
5045                     }
5046                     else
5047                     {
5048                         c.IEV1.Vseg = UDATA;
5049                     }
5050                     c.IEV1.Vpointer += s.Soffset;
5051                     c.IFL1 = FLdatseg;
5052                 }
5053                 else
5054                     c.IFL1 = FLextern;
5055                 goto do2;
5056 
5057             case FLtlsdata:
5058                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5059                     c.IFL1 = FLextern;
5060                 goto do2;
5061 
5062             case FLdatseg:
5063                 //c.IEV1.Vseg = DATA;
5064                 goto do2;
5065 
5066             case FLfardata:
5067             case FLcsdata:
5068             case FLpseudo:
5069                 goto do2;
5070 
5071             case FLstack:
5072                 //printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n",
5073                 //s.Soffset,EBPtoESP,base,c.IEV1.Vpointer);
5074                 c.IEV1.Vpointer += s.Soffset + EBPtoESP - base - EEStack.offset;
5075                 break;
5076 
5077             case FLfast:
5078                 soff = Fast.size;
5079                 goto L1;
5080 
5081             case FLreg:
5082             case FLauto:
5083                 soff = Auto.size;
5084             L1:
5085                 if (Symbol_Sisdead(s, anyiasm))
5086                 {
5087                     c.Iop = NOP;               // remove references to it
5088                     continue;
5089                 }
5090                 if (s.Sfl == FLreg && c.IEV1.Vpointer < 2)
5091                 {
5092                     reg_t reg = s.Sreglsw;
5093 
5094                     assert(!(s.Sregm & ~mask(reg)));
5095                     if (c.IEV1.Vpointer == 1)
5096                     {
5097                         assert(reg < 4);    /* must be a BYTEREGS   */
5098                         reg |= 4;           /* convert to high byte reg */
5099                     }
5100                     if (reg & 8)
5101                     {
5102                         assert(I64);
5103                         c.Irex |= REX_B;
5104                         reg &= 7;
5105                     }
5106                     c.Irm = (c.Irm & modregrm(0,7,0))
5107                             | modregrm(3,0,reg);
5108                     assert(c.Iop != LES && c.Iop != LEA);
5109                     goto do2;
5110                 }
5111                 else
5112                 {   c.IEV1.Vpointer += s.Soffset + soff + BPoff;
5113                     if (s.Sflags & SFLunambig)
5114                         c.Iflags |= CFunambig;
5115             L2:
5116                     if (!hasframe || (enforcealign && c.IFL1 != FLpara))
5117                     {   /* Convert to ESP relative address instead of EBP */
5118                         assert(!I16);
5119                         c.IEV1.Vpointer += EBPtoESP;
5120                         ubyte crm = c.Irm;
5121                         if ((crm & 7) == 4)              // if SIB byte
5122                         {
5123                             assert((c.Isib & 7) == BP);
5124                             assert((crm & 0xC0) != 0);
5125                             c.Isib = (c.Isib & ~7) | modregrm(0,0,SP);
5126                         }
5127                         else
5128                         {
5129                             assert((crm & 7) == 5);
5130                             c.Irm = (crm & modregrm(0,7,0))
5131                                     | modregrm(2,0,4);
5132                             c.Isib = modregrm(0,4,SP);
5133                         }
5134                     }
5135                 }
5136                 break;
5137 
5138             case FLpara:
5139                 soff = Para.size - BPoff;    // cancel out add of BPoff
5140                 goto L1;
5141 
5142             case FLfltreg:
5143                 c.IEV1.Vpointer += Foff + BPoff;
5144                 c.Iflags |= CFunambig;
5145                 goto L2;
5146 
5147             case FLallocatmp:
5148                 c.IEV1.Vpointer += Alloca.offset + BPoff;
5149                 goto L2;
5150 
5151             case FLfuncarg:
5152                 c.IEV1.Vpointer += cgstate.funcarg.offset + BPoff;
5153                 goto L2;
5154 
5155             case FLbprel:
5156                 c.IEV1.Vpointer += s.Soffset;
5157                 break;
5158 
5159             case FLcs:
5160                 sn = c.IEV1.Vuns;
5161                 if (!CSE.loaded(sn))            // if never loaded
5162                 {
5163                     c.Iop = NOP;
5164                     continue;
5165                 }
5166                 c.IEV1.Vpointer = CSE.offset(sn) + CSoff + BPoff;
5167                 c.Iflags |= CFunambig;
5168                 goto L2;
5169 
5170             case FLregsave:
5171                 sn = c.IEV1.Vuns;
5172                 c.IEV1.Vpointer = sn + regsave.off + BPoff;
5173                 c.Iflags |= CFunambig;
5174                 goto L2;
5175 
5176             case FLndp:
5177                 version (MARS)
5178                 {
5179                     assert(c.IEV1.Vuns < global87.save.length);
5180                 }
5181                 c.IEV1.Vpointer = c.IEV1.Vuns * tysize(TYldouble) + NDPoff + BPoff;
5182                 c.Iflags |= CFunambig;
5183                 goto L2;
5184 
5185             case FLoffset:
5186                 break;
5187 
5188             case FLlocalsize:
5189                 c.IEV1.Vpointer += localsize;
5190                 break;
5191 
5192             case FLconst:
5193             default:
5194                 goto do2;
5195         }
5196         c.IFL1 = FLconst;
5197     do2:
5198         /* Ignore TEST (F6 and F7) opcodes      */
5199         if (!(ins & T)) goto done;              /* if no second operand */
5200         s = c.IEV2.Vsym;
5201         switch (c.IFL2)
5202         {
5203             case FLdata:
5204                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5205                 {
5206                     c.IFL2 = FLextern;
5207                     goto do2;
5208                 }
5209                 else
5210                 {
5211                     if (s.Sclass == SCcomdat)
5212                     {   c.IFL2 = FLextern;
5213                         goto do2;
5214                     }
5215                     c.IEV2.Vseg = MARS ? s.Sseg : DATA;
5216                     c.IEV2.Vpointer += s.Soffset;
5217                     c.IFL2 = FLdatseg;
5218                     goto done;
5219                 }
5220 
5221             case FLudata:
5222                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5223                 {
5224                     c.IFL2 = FLextern;
5225                     goto do2;
5226                 }
5227                 else
5228                 {
5229                     c.IEV2.Vseg = MARS ? s.Sseg : UDATA;
5230                     c.IEV2.Vpointer += s.Soffset;
5231                     c.IFL2 = FLdatseg;
5232                     goto done;
5233                 }
5234 
5235             case FLtlsdata:
5236                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5237                 {
5238                     c.IFL2 = FLextern;
5239                     goto do2;
5240                 }
5241                 goto done;
5242 
5243             case FLdatseg:
5244                 //c.IEV2.Vseg = DATA;
5245                 goto done;
5246 
5247             case FLcsdata:
5248             case FLfardata:
5249                 goto done;
5250 
5251             case FLreg:
5252             case FLpseudo:
5253                 assert(0);
5254                 /* NOTREACHED */
5255 
5256             case FLfast:
5257                 c.IEV2.Vpointer += s.Soffset + Fast.size + BPoff;
5258                 break;
5259 
5260             case FLauto:
5261                 c.IEV2.Vpointer += s.Soffset + Auto.size + BPoff;
5262             L3:
5263                 if (!hasframe || (enforcealign && c.IFL2 != FLpara))
5264                     /* Convert to ESP relative address instead of EBP */
5265                     c.IEV2.Vpointer += EBPtoESP;
5266                 break;
5267 
5268             case FLpara:
5269                 c.IEV2.Vpointer += s.Soffset + Para.size;
5270                 goto L3;
5271 
5272             case FLfltreg:
5273                 c.IEV2.Vpointer += Foff + BPoff;
5274                 goto L3;
5275 
5276             case FLallocatmp:
5277                 c.IEV2.Vpointer += Alloca.offset + BPoff;
5278                 goto L3;
5279 
5280             case FLfuncarg:
5281                 c.IEV2.Vpointer += cgstate.funcarg.offset + BPoff;
5282                 goto L3;
5283 
5284             case FLbprel:
5285                 c.IEV2.Vpointer += s.Soffset;
5286                 break;
5287 
5288             case FLstack:
5289                 c.IEV2.Vpointer += s.Soffset + EBPtoESP - base;
5290                 break;
5291 
5292             case FLcs:
5293             case FLndp:
5294             case FLregsave:
5295                 assert(0);
5296 
5297             case FLconst:
5298                 break;
5299 
5300             case FLlocalsize:
5301                 c.IEV2.Vpointer += localsize;
5302                 break;
5303 
5304             default:
5305                 goto done;
5306         }
5307         c.IFL2 = FLconst;
5308   done:
5309         { }
5310     }
5311 }
5312 
5313 /*******************************
5314  * Return offset from BP of symbol s.
5315  */
5316 
5317 targ_size_t cod3_bpoffset(Symbol *s)
5318 {
5319     targ_size_t offset;
5320 
5321     symbol_debug(s);
5322     offset = s.Soffset;
5323     switch (s.Sfl)
5324     {
5325         case FLpara:
5326             offset += Para.size;
5327             break;
5328 
5329         case FLfast:
5330             offset += Fast.size + BPoff;
5331             break;
5332 
5333         case FLauto:
5334             offset += Auto.size + BPoff;
5335             break;
5336 
5337         default:
5338             WRFL(cast(FL)s.Sfl);
5339             symbol_print(s);
5340             assert(0);
5341     }
5342     assert(hasframe);
5343     return offset;
5344 }
5345 
5346 
5347 /*******************************
5348  * Find shorter versions of the same instructions.
5349  * Does these optimizations:
5350  *      replaces jmps to the next instruction with NOPs
5351  *      sign extension of modregrm displacement
5352  *      sign extension of immediate data (can't do it for OR, AND, XOR
5353  *              as the opcodes are not defined)
5354  *      short versions for AX EA
5355  *      short versions for reg EA
5356  * Code is neither removed nor added.
5357  * Params:
5358  *      b = block for code (or null)
5359  *      c = code list to optimize
5360  */
5361 
5362 void pinholeopt(code *c,block *b)
5363 {
5364     targ_size_t a;
5365     uint mod;
5366     ubyte ins;
5367     int usespace;
5368     int useopsize;
5369     int space;
5370     block *bn;
5371 
5372     debug
5373     {
5374         __gshared int tested; if (!tested) { tested++; pinholeopt_unittest(); }
5375     }
5376 
5377     debug
5378     {
5379         code *cstart = c;
5380         if (debugc)
5381         {
5382             printf("+pinholeopt(%p)\n",c);
5383         }
5384     }
5385 
5386     if (b)
5387     {
5388         bn = b.Bnext;
5389         usespace = (config.flags4 & CFG4space && b.BC != BCasm);
5390         useopsize = (I16 || (config.flags4 & CFG4space && b.BC != BCasm));
5391     }
5392     else
5393     {
5394         bn = null;
5395         usespace = (config.flags4 & CFG4space);
5396         useopsize = (I16 || config.flags4 & CFG4space);
5397     }
5398     for (; c; c = code_next(c))
5399     {
5400     L1:
5401         opcode_t op = c.Iop;
5402         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
5403             ins = vex_inssize(c);
5404         else if ((op & 0xFFFD00) == 0x0F3800)
5405             ins = inssize2[(op >> 8) & 0xFF];
5406         else if ((op & 0xFF00) == 0x0F00)
5407             ins = inssize2[op & 0xFF];
5408         else
5409             ins = inssize[op & 0xFF];
5410         if (ins & M)            // if modregrm byte
5411         {
5412             int shortop = (c.Iflags & CFopsize) ? !I16 : I16;
5413             int local_BPRM = BPRM;
5414 
5415             if (c.Iflags & CFaddrsize)
5416                 local_BPRM ^= 5 ^ 6;    // toggle between 5 and 6
5417 
5418             uint rm = c.Irm;
5419             reg_t reg = rm & modregrm(0,7,0);          // isolate reg field
5420             reg_t ereg = rm & 7;
5421             //printf("c = %p, op = %02x rm = %02x\n", c, op, rm);
5422 
5423             /* If immediate second operand      */
5424             if ((ins & T ||
5425                  ((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0)))
5426                 ) &&
5427                 c.IFL2 == FLconst)
5428             {
5429                 int flags = c.Iflags & CFpsw;      /* if want result in flags */
5430                 targ_long u = c.IEV2.Vuns;
5431                 if (ins & E)
5432                     u = cast(byte) u;
5433                 else if (shortop)
5434                     u = cast(short) u;
5435 
5436                 // Replace CMP reg,0 with TEST reg,reg
5437                 if ((op & 0xFE) == 0x80 &&              // 80 is CMP R8,imm8; 81 is CMP reg,imm
5438                     rm >= modregrm(3,7,AX) &&
5439                     u == 0)
5440                 {
5441                     c.Iop = (op & 1) | 0x84;
5442                     c.Irm = modregrm(3,ereg,ereg);
5443                     if (c.Irex & REX_B)
5444                         c.Irex |= REX_R;
5445                     goto L1;
5446                 }
5447 
5448                 /* Optimize ANDs with an immediate constant             */
5449                 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0))
5450                 {
5451                     if (rm >= modregrm(3,4,AX))         // AND reg,imm
5452                     {
5453                         if (u == 0)
5454                         {
5455                             /* Replace with XOR reg,reg     */
5456                             c.Iop = 0x30 | (op & 1);
5457                             c.Irm = modregrm(3,ereg,ereg);
5458                             if (c.Irex & REX_B)
5459                                 c.Irex |= REX_R;
5460                             goto L1;
5461                         }
5462                         if (u == 0xFFFFFFFF && !flags)
5463                         {
5464                             c.Iop = NOP;
5465                             goto L1;
5466                         }
5467                     }
5468                     if (op == 0x81 && !flags)
5469                     {   // If we can do the operation in one byte
5470 
5471                         // If EA is not SI or DI
5472                         if ((rm < modregrm(3,4,SP) || I64) &&
5473                             (config.flags4 & CFG4space ||
5474                              config.target_cpu < TARGET_PentiumPro)
5475                            )
5476                         {
5477                             if ((u & 0xFFFFFF00) == 0xFFFFFF00)
5478                                 goto L2;
5479                             else if (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4))
5480                             {
5481                                 if (!shortop)
5482                                 {
5483                                     if ((u & 0xFFFF00FF) == 0xFFFF00FF)
5484                                         goto L3;
5485                                 }
5486                                 else
5487                                 {
5488                                     if ((u & 0xFF) == 0xFF)
5489                                         goto L3;
5490                                 }
5491                             }
5492                         }
5493                         if (!shortop && useopsize)
5494                         {
5495                             if ((u & 0xFFFF0000) == 0xFFFF0000)
5496                             {
5497                                 c.Iflags ^= CFopsize;
5498                                 goto L1;
5499                             }
5500                             if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX))
5501                             {
5502                                 c.IEV1.Voffset += 2; /* address MSW      */
5503                                 c.IEV2.Vuns >>= 16;
5504                                 c.Iflags ^= CFopsize;
5505                                 goto L1;
5506                             }
5507                             if (rm >= modregrm(3,4,AX))
5508                             {
5509                                 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64))
5510                                 {
5511                                     c.Iop = 0x0FB6;     // MOVZX
5512                                     c.Irm = modregrm(3,ereg,ereg);
5513                                     if (c.Irex & REX_B)
5514                                         c.Irex |= REX_R;
5515                                     goto L1;
5516                                 }
5517                                 if (u == 0xFFFF)
5518                                 {
5519                                     c.Iop = 0x0FB7;     // MOVZX
5520                                     c.Irm = modregrm(3,ereg,ereg);
5521                                     if (c.Irex & REX_B)
5522                                         c.Irex |= REX_R;
5523                                     goto L1;
5524                                 }
5525                             }
5526                         }
5527                     }
5528                 }
5529 
5530                 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */
5531                 if (!flags &&
5532                     (op == 0x81 || op == 0x80) &&
5533                     (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) ||  // ADD,OR
5534                      reg == modregrm(0,5,0) || reg == modregrm(0,6,0))    // SUB, XOR
5535                    )
5536                 {
5537                     if (u == 0)
5538                     {
5539                         c.Iop = NOP;
5540                         goto L1;
5541                     }
5542                     if (u == ~0 && reg == modregrm(0,6,0))  /* XOR  */
5543                     {
5544                         c.Iop = 0xF6 | (op & 1);       /* NOT  */
5545                         c.Irm ^= modregrm(0,6^2,0);
5546                         goto L1;
5547                     }
5548                     if (!shortop &&
5549                         useopsize &&
5550                         op == 0x81 &&
5551                         (u & 0xFFFF0000) == 0 &&
5552                         (reg == modregrm(0,6,0) || reg == modregrm(0,1,0)))
5553                     {
5554                         c.Iflags ^= CFopsize;
5555                         goto L1;
5556                     }
5557                 }
5558 
5559                 /* Look for TEST or OR or XOR with an immediate constant */
5560                 /* that we can replace with a byte operation            */
5561                 if (op == 0xF7 && reg == modregrm(0,0,0) ||
5562                     op == 0x81 && reg == modregrm(0,6,0) && !flags ||
5563                     op == 0x81 && reg == modregrm(0,1,0))
5564                 {
5565                     // See if we can replace a dword with a word
5566                     // (avoid for 32 bit instructions, because CFopsize
5567                     //  is too slow)
5568                     if (!shortop && useopsize)
5569                     {
5570                         if ((u & 0xFFFF0000) == 0)
5571                         {
5572                             c.Iflags ^= CFopsize;
5573                             goto L1;
5574                         }
5575                         /* If memory (not register) addressing mode     */
5576                         if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX))
5577                         {
5578                             c.IEV1.Voffset += 2; /* address MSW  */
5579                             c.IEV2.Vuns >>= 16;
5580                             c.Iflags ^= CFopsize;
5581                             goto L1;
5582                         }
5583                     }
5584 
5585                     // If EA is not SI or DI
5586                     if (rm < (modregrm(3,0,SP) | reg) &&
5587                         (usespace ||
5588                          config.target_cpu < TARGET_PentiumPro)
5589                        )
5590                     {
5591                         if ((u & 0xFFFFFF00) == 0)
5592                         {
5593                         L2: c.Iop--;           /* to byte instruction  */
5594                             c.Iflags &= ~CFopsize;
5595                             goto L1;
5596                         }
5597                         if (((u & 0xFFFF00FF) == 0 ||
5598                              (shortop && (u & 0xFF) == 0)) &&
5599                             (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4)))
5600                         {
5601                         L3:
5602                             c.IEV2.Vuns >>= 8;
5603                             if (rm >= (modregrm(3,0,AX) | reg))
5604                                 c.Irm |= 4;    /* AX.AH, BX.BH, etc. */
5605                             else
5606                                 c.IEV1.Voffset += 1;
5607                             goto L2;
5608                         }
5609                     }
5610 
5611                     // BUG: which is right?
5612                     //else if ((u & 0xFFFF0000) == 0)
5613 
5614                     else if (0 && op == 0xF7 &&
5615                              rm >= modregrm(3,0,SP) &&
5616                              (u & 0xFFFF0000) == 0)
5617 
5618                         c.Iflags &= ~CFopsize;
5619                 }
5620 
5621                 // Try to replace TEST reg,-1 with TEST reg,reg
5622                 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8
5623                 {
5624                     if ((u & 0xFF) == 0xFF)
5625                     {
5626                       L4:
5627                         c.Iop = 0x84;          // TEST regL,regL
5628                         c.Irm = modregrm(3,ereg,ereg);
5629                         if (c.Irex & REX_B)
5630                             c.Irex |= REX_R;
5631                         c.Iflags &= ~CFopsize;
5632                         goto L1;
5633                     }
5634                 }
5635                 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4))
5636                 {
5637                     if (u == 0xFF)
5638                     {
5639                         if (ereg & 4) // SIL,DIL,BPL,SPL need REX prefix
5640                             c.Irex |= REX;
5641                         goto L4;
5642                     }
5643                     if ((u & 0xFFFF) == 0xFF00 && shortop && !c.Irex && ereg < 4)
5644                     {
5645                         ereg |= 4;                /* to regH      */
5646                         goto L4;
5647                     }
5648                 }
5649 
5650                 /* Look for sign extended immediate data */
5651                 if (cast(byte) u == u)
5652                 {
5653                     if (op == 0x81)
5654                     {
5655                         if (reg != 0x08 && reg != 0x20 && reg != 0x30)
5656                             c.Iop = op = 0x83;         /* 8 bit sgn ext */
5657                     }
5658                     else if (op == 0x69)                /* IMUL rw,ew,dw */
5659                         c.Iop = op = 0x6B;             /* IMUL rw,ew,db */
5660                 }
5661 
5662                 // Look for SHIFT EA,imm8 we can replace with short form
5663                 if (u == 1 && ((op & 0xFE) == 0xC0))
5664                     c.Iop |= 0xD0;
5665 
5666             } /* if immediate second operand */
5667 
5668             /* Look for AX short form */
5669             if (ins & A)
5670             {
5671                 if (rm == modregrm(0,AX,local_BPRM) &&
5672                     !(c.Irex & REX_R) &&               // and it's AX, not R8
5673                     (op & ~3) == 0x88 &&
5674                     !I64)
5675                 {
5676                     op = ((op & 3) + 0xA0) ^ 2;
5677                     /* 8A. A0 */
5678                     /* 8B. A1 */
5679                     /* 88. A2 */
5680                     /* 89. A3 */
5681                     c.Iop = op;
5682                     c.IFL2 = c.IFL1;
5683                     c.IEV2 = c.IEV1;
5684                 }
5685 
5686                 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2   */
5687                 else if (!I16 &&
5688                          (op == 0x89 || op == 0x8B) &&
5689                          (rm & 0xC0) == 0xC0 &&
5690                          (!b || b.BC != BCasm)
5691                         )
5692                     c.Iflags &= ~CFopsize;
5693 
5694                 // If rm is AX
5695                 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c.Irex & (REX_R | REX_B)))
5696                 {
5697                     switch (op)
5698                     {
5699                         case 0x80:  op = reg | 4; break;
5700                         case 0x81:  op = reg | 5; break;
5701                         case 0x87:  op = 0x90 + (reg>>3); break;    // XCHG
5702 
5703                         case 0xF6:
5704                             if (reg == 0)
5705                                 op = 0xA8;  /* TEST AL,immed8       */
5706                             break;
5707 
5708                         case 0xF7:
5709                             if (reg == 0)
5710                                 op = 0xA9;  /* TEST AX,immed16      */
5711                             break;
5712 
5713                         default:
5714                             break;
5715                     }
5716                     c.Iop = op;
5717                 }
5718             }
5719 
5720             /* Look for reg short form */
5721             if ((ins & R) && (rm & 0xC0) == 0xC0)
5722             {
5723                 switch (op)
5724                 {
5725                     case 0xC6:  op = 0xB0 + ereg; break;
5726                     case 0xC7: // if no sign extension
5727                         if (!(c.Irex & REX_W && c.IEV2.Vint < 0))
5728                         {
5729                             c.Irm = 0;
5730                             c.Irex &= ~REX_W;
5731                             op = 0xB8 + ereg;
5732                         }
5733                         break;
5734 
5735                     case 0xFF:
5736                         switch (reg)
5737                         {   case 6<<3: op = 0x50+ereg; break;/* PUSH*/
5738                             case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/
5739                             case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/
5740                             default: break;
5741                         }
5742                         break;
5743 
5744                     case 0x8F:  op = 0x58 + ereg; break;
5745                     case 0x87:
5746                         if (reg == 0 && !(c.Irex & (REX_R | REX_B))) // Issue 12968: Needed to ensure it's referencing RAX, not R8
5747                             op = 0x90 + ereg;
5748                         break;
5749 
5750                     default:
5751                         break;
5752                 }
5753                 c.Iop = op;
5754             }
5755 
5756             // Look to remove redundant REX prefix on XOR
5757             if (c.Irex == REX_W // ignore ops involving R8..R15
5758                 && (op == 0x31 || op == 0x33) // XOR
5759                 && ((rm & 0xC0) == 0xC0) // register direct
5760                 && ((reg >> 3) == ereg)) // register with itself
5761             {
5762                 c.Irex = 0;
5763             }
5764 
5765             // Look to replace SHL reg,1 with ADD reg,reg
5766             if ((op & ~1) == 0xD0 &&
5767                      (rm & modregrm(3,7,0)) == modregrm(3,4,0) &&
5768                      config.target_cpu >= TARGET_80486)
5769             {
5770                 c.Iop &= 1;
5771                 c.Irm = cast(ubyte)((rm & modregrm(3,0,7)) | (ereg << 3));
5772                 if (c.Irex & REX_B)
5773                     c.Irex |= REX_R;
5774                 if (!(c.Iflags & CFpsw) && !I16)
5775                     c.Iflags &= ~CFopsize;
5776                 goto L1;
5777             }
5778 
5779             /* Look for sign extended modregrm displacement, or 0
5780              * displacement.
5781              */
5782 
5783             if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp
5784                 c.IFL1 == FLconst)      // and it's a constant
5785             {
5786                 a = c.IEV1.Vpointer;
5787                 if (a == 0 && (rm & 7) != local_BPRM &&         // if 0[disp]
5788                     !(local_BPRM == 5 && (rm & 7) == 4 && (c.Isib & 7) == BP)
5789                    )
5790                     c.Irm &= 0x3F;
5791                 else if (!I16)
5792                 {
5793                     if (cast(targ_size_t)cast(targ_schar)a == a)
5794                         c.Irm ^= 0xC0;                 /* do 8 sx      */
5795                 }
5796                 else if ((cast(targ_size_t)cast(targ_schar)a & 0xFFFF) == (a & 0xFFFF))
5797                     c.Irm ^= 0xC0;                     /* do 8 sx      */
5798             }
5799 
5800             /* Look for LEA reg,[ireg], replace with MOV reg,ireg       */
5801             if (op == LEA)
5802             {
5803                 rm = c.Irm & 7;
5804                 mod = c.Irm & modregrm(3,0,0);
5805                 if (mod == 0)
5806                 {
5807                     if (!I16)
5808                     {
5809                         switch (rm)
5810                         {
5811                             case 4:
5812                             case 5:
5813                                 break;
5814 
5815                             default:
5816                                 c.Irm |= modregrm(3,0,0);
5817                                 c.Iop = 0x8B;
5818                                 break;
5819                         }
5820                     }
5821                     else
5822                     {
5823                         switch (rm)
5824                         {
5825                             case 4:     rm = modregrm(3,0,SI);  goto L6;
5826                             case 5:     rm = modregrm(3,0,DI);  goto L6;
5827                             case 7:     rm = modregrm(3,0,BX);  goto L6;
5828                             L6:     c.Irm = cast(ubyte)(rm + reg);
5829                                     c.Iop = 0x8B;
5830                                     break;
5831 
5832                             default:
5833                                     break;
5834                         }
5835                     }
5836                 }
5837 
5838                 /* replace LEA reg,0[BP] with MOV reg,BP        */
5839                 else if (mod == modregrm(1,0,0) && rm == local_BPRM &&
5840                         c.IFL1 == FLconst && c.IEV1.Vpointer == 0)
5841                 {
5842                     c.Iop = 0x8B;          /* MOV reg,BP   */
5843                     c.Irm = cast(ubyte)(modregrm(3,0,BP) + reg);
5844                 }
5845             }
5846 
5847             // Replace [R13] with 0[R13]
5848             if (c.Irex & REX_B && ((c.Irm & modregrm(3,0,7)) == modregrm(0,0,BP) ||
5849                                     issib(c.Irm) && (c.Irm & modregrm(3,0,0)) == 0 && (c.Isib & 7) == BP))
5850             {
5851                 c.Irm |= modregrm(1,0,0);
5852                 c.IFL1 = FLconst;
5853                 c.IEV1.Vpointer = 0;
5854             }
5855         }
5856         else if (!(c.Iflags & CFvex))
5857         {
5858             switch (op)
5859             {
5860                 default:
5861                     // Look for MOV r64, immediate
5862                     if ((c.Irex & REX_W) && (op & ~7) == 0xB8)
5863                     {
5864                         /* Look for zero extended immediate data */
5865                         if (c.IEV2.Vsize_t == c.IEV2.Vuns)
5866                         {
5867                             c.Irex &= ~REX_W;
5868                         }
5869                         /* Look for sign extended immediate data */
5870                         else if (c.IEV2.Vsize_t == c.IEV2.Vint)
5871                         {
5872                             c.Irm = modregrm(3,0,op & 7);
5873                             c.Iop = op = 0xC7;
5874                             c.IEV2.Vsize_t = c.IEV2.Vuns;
5875                         }
5876                     }
5877                     if ((op & ~0x0F) != 0x70)
5878                         break;
5879                     goto case JMP;
5880 
5881                 case JMP:
5882                     switch (c.IFL2)
5883                     {
5884                         case FLcode:
5885                             if (c.IEV2.Vcode == code_next(c))
5886                             {
5887                                 c.Iop = NOP;
5888                                 continue;
5889                             }
5890                             break;
5891 
5892                         case FLblock:
5893                             if (!code_next(c) && c.IEV2.Vblock == bn)
5894                             {
5895                                 c.Iop = NOP;
5896                                 continue;
5897                             }
5898                             break;
5899 
5900                         case FLconst:
5901                         case FLfunc:
5902                         case FLextern:
5903                             break;
5904 
5905                         default:
5906                             WRFL(cast(FL)c.IFL2);
5907                             assert(0);
5908                     }
5909                     break;
5910 
5911                 case 0x68:                      // PUSH immed16
5912                     if (c.IFL2 == FLconst)
5913                     {
5914                         targ_long u = c.IEV2.Vuns;
5915                         if (I64 ||
5916                             ((c.Iflags & CFopsize) ? I16 : I32))
5917                         {   // PUSH 32/64 bit operand
5918                             if (u == cast(byte) u)
5919                                 c.Iop = 0x6A;          // PUSH immed8
5920                         }
5921                         else // PUSH 16 bit operand
5922                         {
5923                             if (cast(short)u == cast(byte) u)
5924                                 c.Iop = 0x6A;          // PUSH immed8
5925                         }
5926                     }
5927                     break;
5928             }
5929         }
5930     }
5931 
5932     debug
5933     if (debugc)
5934     {
5935         printf("-pinholeopt(%p)\n",cstart);
5936         for (c = cstart; c; c = code_next(c))
5937             code_print(c);
5938     }
5939 }
5940 
5941 
5942 debug
5943 {
5944 private void pinholeopt_unittest()
5945 {
5946     //printf("pinholeopt_unittest()\n");
5947     static struct CS
5948     {
5949         uint model,op,ea;
5950         targ_size_t ev1,ev2;
5951         uint flags;
5952     }
5953     __gshared CS[2][22] tests =
5954     [
5955         // XOR reg,immed                            NOT regL
5956         [ { 16,0x81,modregrm(3,6,BX),0,0xFF,0 },    { 0,0xF6,modregrm(3,2,BX),0,0xFF } ],
5957 
5958         // MOV 0[BX],3                               MOV [BX],3
5959         [ { 16,0xC7,modregrm(2,0,7),0,3 },          { 0,0xC7,modregrm(0,0,7),0,3 } ],
5960 
5961 /+      // only if config.flags4 & CFG4space
5962         // TEST regL,immed8
5963         [ { 0,0xF6,modregrm(3,0,BX),0,0xFF,0 },    { 0,0x84,modregrm(3,BX,BX),0,0xFF }],
5964         [ { 0,0xF7,modregrm(3,0,BX),0,0xFF,0 },    { 0,0x84,modregrm(3,BX,BX),0,0xFF }],
5965         [ { 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 },  { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }],
5966         [ { 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 },  { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }],
5967 +/
5968 
5969         // PUSH immed => PUSH immed8
5970         [ { 0,0x68,0,0,0 },    { 0,0x6A,0,0,0 }],
5971         [ { 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }],
5972         [ { 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }],
5973         [ { 16,0x68,0,0,0,CFopsize },    { 0,0x6A,0,0,0,CFopsize }],
5974         [ { 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }],
5975         [ { 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }],
5976         [ { 16,0x68,0,0,0x10000,0 },     { 0,0x6A,0,0,0x10000,0 }],
5977         [ { 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }],
5978         [ { 32,0x68,0,0,0,CFopsize },    { 0,0x6A,0,0,0,CFopsize }],
5979         [ { 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }],
5980         [ { 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }],
5981         [ { 32,0x68,0,0,0x10000,CFopsize },    { 0,0x6A,0,0,0x10000,CFopsize }],
5982         [ { 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }],
5983 
5984         // clear r64, for r64 != R8..R15
5985         [ { 64,0x31,0x800C0,0,0,0 }, { 0,0x31,0xC0,0,0,0}],
5986         [ { 64,0x33,0x800C0,0,0,0 }, { 0,0x33,0xC0,0,0,0}],
5987 
5988         // MOV r64, immed
5989         [ { 64,0xC7,0x800C0,0,0xFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,0xFFFFFFFF,0}],
5990         [ { 64,0xC7,0x800C0,0,0x7FFFFFFF,0 }, { 0,0xB8,0,0,0x7FFFFFFF,0}],
5991         [ { 64,0xB8,0x80000,0,0xFFFFFFFF,0 }, { 0,0xB8,0,0,0xFFFFFFFF,0 }],
5992         [ { 64,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }, { 0,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }],
5993         [ { 64,0xB8,0x80000,0,cast(targ_size_t)0xFFFFFFFFFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,cast(targ_size_t)0xFFFFFFFF,0}],
5994     ];
5995 
5996     //config.flags4 |= CFG4space;
5997     for (int i = 0; i < tests.length; i++)
5998     {   CS *pin  = &tests[i][0];
5999         CS *pout = &tests[i][1];
6000         code cs = void;
6001         memset(&cs, 0, cs.sizeof);
6002         if (pin.model)
6003         {
6004             if (I16 && pin.model != 16)
6005                 continue;
6006             if (I32 && pin.model != 32)
6007                 continue;
6008             if (I64 && pin.model != 64)
6009                 continue;
6010         }
6011         //printf("[%d]\n", i);
6012         cs.Iop = pin.op;
6013         cs.Iea = pin.ea;
6014         cs.IFL1 = FLconst;
6015         cs.IFL2 = FLconst;
6016         cs.IEV1.Vsize_t = pin.ev1;
6017         cs.IEV2.Vsize_t = pin.ev2;
6018         cs.Iflags = pin.flags;
6019         pinholeopt(&cs, null);
6020         if (cs.Iop != pout.op)
6021         {   printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout.op);
6022             assert(0);
6023         }
6024         assert(cs.Iea == pout.ea);
6025         assert(cs.IEV1.Vsize_t == pout.ev1);
6026         assert(cs.IEV2.Vsize_t == pout.ev2);
6027         assert(cs.Iflags == pout.flags);
6028     }
6029 }
6030 }
6031 
6032 void simplify_code(code* c)
6033 {
6034     reg_t reg;
6035     if (config.flags4 & CFG4optimized &&
6036         (c.Iop == 0x81 || c.Iop == 0x80) &&
6037         c.IFL2 == FLconst &&
6038         reghasvalue((c.Iop == 0x80) ? BYTEREGS : ALLREGS,I64 ? c.IEV2.Vsize_t : c.IEV2.Vlong,&reg) &&
6039         !(I16 && c.Iflags & CFopsize)
6040        )
6041     {
6042         // See if we can replace immediate instruction with register instruction
6043         static immutable ubyte[8] regop =
6044                 [ 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 ];
6045 
6046         //printf("replacing 0x%02x, val = x%lx\n",c.Iop,c.IEV2.Vlong);
6047         c.Iop = regop[(c.Irm & modregrm(0,7,0)) >> 3] | (c.Iop & 1);
6048         code_newreg(c, reg);
6049         if (I64 && !(c.Iop & 1) && (reg & 4))
6050             c.Irex |= REX;
6051     }
6052 }
6053 
6054 /**************************
6055  * Compute jump addresses for FLcode.
6056  * Note: only works for forward referenced code.
6057  *       only direct jumps and branches are detected.
6058  *       LOOP instructions only work for backward refs.
6059  */
6060 
6061 void jmpaddr(code *c)
6062 {
6063     code* ci,cn,ctarg,cstart;
6064     targ_size_t ad;
6065 
6066     //printf("jmpaddr()\n");
6067     cstart = c;                           /* remember start of code       */
6068     while (c)
6069     {
6070         const op = c.Iop;
6071         if (op <= 0xEB &&
6072             inssize[op] & T &&   // if second operand
6073             c.IFL2 == FLcode &&
6074             ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL))
6075         {
6076             ci = code_next(c);
6077             ctarg = c.IEV2.Vcode;  /* target code                  */
6078             ad = 0;                 /* IP displacement              */
6079             while (ci && ci != ctarg)
6080             {
6081                 ad += calccodsize(ci);
6082                 ci = code_next(ci);
6083             }
6084             if (!ci)
6085                 goto Lbackjmp;      // couldn't find it
6086             if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL)
6087                 c.IEV2.Vpointer = ad;
6088             else                    /* else conditional             */
6089             {
6090                 if (!(c.Iflags & CFjmp16))     /* if branch    */
6091                     c.IEV2.Vpointer = ad;
6092                 else            /* branch around a long jump    */
6093                 {
6094                     cn = code_next(c);
6095                     c.next = code_calloc();
6096                     code_next(c).next = cn;
6097                     c.Iop = op ^ 1;        /* converse jmp */
6098                     c.Iflags &= ~CFjmp16;
6099                     c.IEV2.Vpointer = I16 ? 3 : 5;
6100                     cn = code_next(c);
6101                     cn.Iop = JMP;          /* long jump    */
6102                     cn.IFL2 = FLconst;
6103                     cn.IEV2.Vpointer = ad;
6104                 }
6105             }
6106             c.IFL2 = FLconst;
6107         }
6108         if (op == LOOP && c.IFL2 == FLcode)    /* backwards refs       */
6109         {
6110           Lbackjmp:
6111             ctarg = c.IEV2.Vcode;
6112             for (ci = cstart; ci != ctarg; ci = code_next(ci))
6113                 if (!ci || ci == c)
6114                     assert(0);
6115             ad = 2;                 /* - IP displacement            */
6116             while (ci != c)
6117             {
6118                 assert(ci);
6119                 ad += calccodsize(ci);
6120                 ci = code_next(ci);
6121             }
6122             c.IEV2.Vpointer = (-ad) & 0xFF;
6123             c.IFL2 = FLconst;
6124         }
6125         c = code_next(c);
6126     }
6127 }
6128 
6129 /*******************************
6130  * Calculate bl.Bsize.
6131  */
6132 
6133 uint calcblksize(code *c)
6134 {
6135     uint size;
6136     for (size = 0; c; c = code_next(c))
6137     {
6138         uint sz = calccodsize(c);
6139         //printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c.Iop);
6140         size += sz;
6141     }
6142     //printf("calcblksize(c = x%x) = %d\n", c, size);
6143     return size;
6144 }
6145 
6146 /*****************************
6147  * Calculate and return code size of a code.
6148  * Note that NOPs are sometimes used as markers, but are
6149  * never output. LINNUMs are never output.
6150  * Note: This routine must be fast. Profiling shows it is significant.
6151  */
6152 
6153 uint calccodsize(code *c)
6154 {
6155     uint size;
6156     ubyte rm,mod,ins;
6157     uint iflags;
6158     uint i32 = I32 || I64;
6159     uint a32 = i32;
6160 
6161     debug
6162     assert((a32 & ~1) == 0);
6163 
6164     iflags = c.Iflags;
6165     opcode_t op = c.Iop;
6166     if (iflags & CFvex && c.Ivex.pfx == 0xC4)
6167     {
6168         ins = vex_inssize(c);
6169         size = ins & 7;
6170         goto Lmodrm;
6171     }
6172     else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800)
6173         op = 0x0F;
6174     else
6175         op &= 0xFF;
6176     switch (op)
6177     {
6178         case 0x0F:
6179             if ((c.Iop & 0xFFFD00) == 0x0F3800)
6180             {   // 3 byte op ( 0F38-- or 0F3A-- )
6181                 ins = inssize2[(c.Iop >> 8) & 0xFF];
6182                 size = ins & 7;
6183                 if (c.Iop & 0xFF000000)
6184                   size++;
6185             }
6186             else
6187             {   // 2 byte op ( 0F-- )
6188                 ins = inssize2[c.Iop & 0xFF];
6189                 size = ins & 7;
6190                 if (c.Iop & 0xFF0000)
6191                   size++;
6192             }
6193             break;
6194 
6195         case NOP:
6196         case ESCAPE:
6197             size = 0;                   // since these won't be output
6198             goto Lret2;
6199 
6200         case ASM:
6201             if (c.Iflags == CFaddrsize)        // kludge for DA inline asm
6202                 size = _tysize[TYnptr];
6203             else
6204                 size = cast(uint)c.IEV1.len;
6205             goto Lret2;
6206 
6207         case 0xA1:
6208         case 0xA3:
6209             if (c.Irex)
6210             {
6211                 size = 9;               // 64 bit immediate value for MOV to/from RAX
6212                 goto Lret;
6213             }
6214             goto Ldefault;
6215 
6216         case 0xF6:                      /* TEST mem8,immed8             */
6217             ins = inssize[op];
6218             size = ins & 7;
6219             if (i32)
6220                 size = inssize32[op];
6221             if ((c.Irm & (7<<3)) == 0)
6222                 size++;                 /* size of immed8               */
6223             break;
6224 
6225         case 0xF7:
6226             ins = inssize[op];
6227             size = ins & 7;
6228             if (i32)
6229                 size = inssize32[op];
6230             if ((c.Irm & (7<<3)) == 0)
6231                 size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2;
6232             break;
6233 
6234         default:
6235         Ldefault:
6236             ins = inssize[op];
6237             size = ins & 7;
6238             if (i32)
6239                 size = inssize32[op];
6240     }
6241 
6242     if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG))
6243     {
6244         if (iflags & CFwait)    // if add FWAIT prefix
6245             size++;
6246         if (iflags & CFSEG)     // if segment override
6247             size++;
6248 
6249         // If the instruction has a second operand that is not an 8 bit,
6250         // and the operand size prefix is present, then fix the size computation
6251         // because the operand size will be different.
6252         // Walter, I had problems with this bit at the end.  There can still be
6253         // an ADDRSIZE prefix for these and it does indeed change the operand size.
6254 
6255         if (iflags & (CFopsize | CFaddrsize))
6256         {
6257             if ((ins & (T|E)) == T)
6258             {
6259                 if ((op & 0xAC) == 0xA0)
6260                 {
6261                     if (iflags & CFaddrsize && !I64)
6262                     {   if (I32)
6263                             size -= 2;
6264                         else
6265                             size += 2;
6266                     }
6267                 }
6268                 else if (iflags & CFopsize)
6269                 {   if (I16)
6270                         size += 2;
6271                     else
6272                         size -= 2;
6273                 }
6274             }
6275             if (iflags & CFaddrsize)
6276             {   if (!I64)
6277                     a32 ^= 1;
6278                 size++;
6279             }
6280             if (iflags & CFopsize)
6281                 size++;                         /* +1 for OPSIZE prefix         */
6282         }
6283     }
6284 
6285 Lmodrm:
6286     if ((op & ~0x0F) == 0x70)
6287     {
6288         if (iflags & CFjmp16)           // if long branch
6289             size += I16 ? 3 : 4;        // + 3(4) bytes for JMP
6290     }
6291     else if (ins & M)                   // if modregrm byte
6292     {
6293         rm = c.Irm;
6294         mod = rm & 0xC0;
6295         if (a32 || I64)
6296         {   // 32 bit addressing
6297             if (issib(rm))
6298                 size++;
6299             switch (mod)
6300             {   case 0:
6301                     if (issib(rm) && (c.Isib & 7) == 5 ||
6302                         (rm & 7) == 5)
6303                         size += 4;      /* disp32                       */
6304                     if (c.Irex & REX_B && (rm & 7) == 5)
6305                         /* Instead of selecting R13, this mode is an [RIP] relative
6306                          * address. Although valid, it's redundant, and should not
6307                          * be generated. Instead, generate 0[R13] instead of [R13].
6308                          */
6309                         assert(0);
6310                     break;
6311 
6312                 case 0x40:
6313                     size++;             /* disp8                        */
6314                     break;
6315 
6316                 case 0x80:
6317                     size += 4;          /* disp32                       */
6318                     break;
6319 
6320                 default:
6321                     break;
6322             }
6323         }
6324         else
6325         {   // 16 bit addressing
6326             if (mod == 0x40)            /* 01: 8 bit displacement       */
6327                 size++;
6328             else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6))
6329                 size += 2;
6330         }
6331     }
6332 
6333 Lret:
6334     if (!(iflags & CFvex) && c.Irex)
6335     {
6336         size++;
6337         if (c.Irex & REX_W && (op & ~7) == 0xB8)
6338             size += 4;
6339     }
6340 Lret2:
6341     //printf("op = x%02x, size = %d\n",op,size);
6342     return size;
6343 }
6344 
6345 /********************************
6346  * Return !=0 if codes match.
6347  */
6348 
6349 static if (0)
6350 {
6351 
6352 int code_match(code *c1,code *c2)
6353 {
6354     code cs1,cs2;
6355     ubyte ins;
6356 
6357     if (c1 == c2)
6358         goto match;
6359     cs1 = *c1;
6360     cs2 = *c2;
6361     if (cs1.Iop != cs2.Iop)
6362         goto nomatch;
6363     switch (cs1.Iop)
6364     {
6365         case ESCAPE | ESCctor:
6366         case ESCAPE | ESCdtor:
6367             goto nomatch;
6368 
6369         case NOP:
6370             goto match;
6371 
6372         case ASM:
6373             if (cs1.IEV1.len == cs2.IEV1.len &&
6374                 memcmp(cs1.IEV1.bytes,cs2.IEV1.bytes,cs1.EV1.len) == 0)
6375                 goto match;
6376             else
6377                 goto nomatch;
6378 
6379         default:
6380             if ((cs1.Iop & 0xFF) == ESCAPE)
6381                 goto match;
6382             break;
6383     }
6384     if (cs1.Iflags != cs2.Iflags)
6385         goto nomatch;
6386 
6387     ins = inssize[cs1.Iop & 0xFF];
6388     if ((cs1.Iop & 0xFFFD00) == 0x0F3800)
6389     {
6390         ins = inssize2[(cs1.Iop >> 8) & 0xFF];
6391     }
6392     else if ((cs1.Iop & 0xFF00) == 0x0F00)
6393     {
6394         ins = inssize2[cs1.Iop & 0xFF];
6395     }
6396 
6397     if (ins & M)                // if modregrm byte
6398     {
6399         if (cs1.Irm != cs2.Irm)
6400             goto nomatch;
6401         if ((cs1.Irm & 0xC0) == 0xC0)
6402             goto do2;
6403         if (is32bitaddr(I32,cs1.Iflags))
6404         {
6405             if (issib(cs1.Irm) && cs1.Isib != cs2.Isib)
6406                 goto nomatch;
6407             if (
6408                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
6409                )
6410                 goto do2;       /* if no first operand  */
6411         }
6412         else
6413         {
6414             if (
6415                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
6416                )
6417                 goto do2;       /* if no first operand  */
6418         }
6419         if (cs1.IFL1 != cs2.IFL1)
6420             goto nomatch;
6421         if (flinsymtab[cs1.IFL1] && cs1.IEV1.Vsym != cs2.IEV1.Vsym)
6422             goto nomatch;
6423         if (cs1.IEV1.Voffset != cs2.IEV1.Voffset)
6424             goto nomatch;
6425     }
6426 
6427 do2:
6428     if (!(ins & T))                     // if no second operand
6429         goto match;
6430     if (cs1.IFL2 != cs2.IFL2)
6431         goto nomatch;
6432     if (flinsymtab[cs1.IFL2] && cs1.IEV2.Vsym != cs2.IEV2.Vsym)
6433         goto nomatch;
6434     if (cs1.IEV2.Voffset != cs2.IEV2.Voffset)
6435         goto nomatch;
6436 
6437 match:
6438     return 1;
6439 
6440 nomatch:
6441     return 0;
6442 }
6443 
6444 }
6445 
6446 /**************************
6447  * Write code to intermediate file.
6448  * Code starts at offset.
6449  * Returns:
6450  *      addr of end of code
6451  */
6452 
6453 private struct MiniCodeBuf
6454 {
6455 nothrow:
6456     size_t index;
6457     size_t offset;
6458     int seg;
6459     char[100] bytes; // = void;
6460 
6461     this(int seg)
6462     {
6463         index = 0;
6464         this.offset = cast(size_t)Offset(seg);
6465         this.seg = seg;
6466     }
6467 
6468     void flushx()
6469     {
6470         // Emit accumulated bytes to code segment
6471         debug assert(index < bytes.length);
6472         offset += objmod.bytes(seg, offset, cast(uint)index, bytes.ptr);
6473         index = 0;
6474     }
6475 
6476     void gen(char c) { bytes[index++] = c; }
6477 
6478     void genp(size_t n, void *p) { memcpy(&bytes[index], p, n); index += n; }
6479 
6480     void flush() { if (index) flushx(); }
6481 
6482     uint getOffset() { return cast(uint)(offset + index); }
6483 
6484     uint available() { return cast(uint)(bytes.sizeof - index); }
6485 }
6486 
6487 private void do8bit(MiniCodeBuf *pbuf, FL, evc *);
6488 private void do16bit(MiniCodeBuf *pbuf, FL, evc *,int);
6489 private void do32bit(MiniCodeBuf *pbuf, FL, evc *,int,int = 0);
6490 private void do64bit(MiniCodeBuf *pbuf, FL, evc *,int);
6491 
6492 uint codout(int seg, code *c)
6493 {
6494     ubyte rm,mod;
6495     ubyte ins;
6496     code *cn;
6497     uint flags;
6498     Symbol *s;
6499 
6500     debug
6501     if (debugc) printf("codout(%p), Coffset = x%llx\n",c,cast(ulong)Offset(seg));
6502 
6503     MiniCodeBuf ggen = void;
6504     ggen.index = 0;
6505     ggen.offset = cast(size_t)Offset(seg);
6506     ggen.seg = seg;
6507 
6508     for (; c; c = code_next(c))
6509     {
6510         debug
6511         {
6512         if (debugc) { printf("off=%02u, sz=%u, ", ggen.getOffset(), calccodsize(c)); code_print(c); }
6513         uint startoffset = ggen.getOffset();
6514         }
6515 
6516         opcode_t op = c.Iop;
6517         ins = inssize[op & 0xFF];
6518         switch (op & 0xFF)
6519         {
6520             case ESCAPE:
6521                 /* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */
6522                 if(op == 0x660F383E || c.Iflags & CFvex) break;
6523 
6524                 switch (op & 0xFFFF00)
6525                 {   case ESClinnum:
6526                         /* put out line number stuff    */
6527                         objmod.linnum(c.IEV1.Vsrcpos,seg,ggen.getOffset());
6528                         break;
6529 version (SCPP)
6530 {
6531 static if (1)
6532 {
6533                     case ESCctor:
6534                     case ESCdtor:
6535                     case ESCoffset:
6536                         if (config.exe != EX_WIN32)
6537                             except_pair_setoffset(c,ggen.getOffset() - funcoffset);
6538                         break;
6539 
6540                     case ESCmark:
6541                     case ESCrelease:
6542                     case ESCmark2:
6543                     case ESCrelease2:
6544                         break;
6545 }
6546 else
6547 {
6548                     case ESCctor:
6549                         except_push(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null);
6550                         break;
6551 
6552                     case ESCdtor:
6553                         except_pop(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null);
6554                         break;
6555 
6556                     case ESCmark:
6557                         except_mark();
6558                         break;
6559 
6560                     case ESCrelease:
6561                         except_release();
6562                         break;
6563 }
6564 }
6565                     case ESCadjesp:
6566                         //printf("adjust ESP %ld\n", (long)c.IEV1.Vint);
6567                         break;
6568 
6569                     default:
6570                         break;
6571                 }
6572 
6573                 debug
6574                 assert(calccodsize(c) == 0);
6575 
6576                 continue;
6577 
6578             case NOP:                   /* don't send them out          */
6579                 if (op != NOP)
6580                     break;
6581                 debug
6582                 assert(calccodsize(c) == 0);
6583 
6584                 continue;
6585 
6586             case ASM:
6587                 if (op != ASM)
6588                     break;
6589                 ggen.flush();
6590                 if (c.Iflags == CFaddrsize)    // kludge for DA inline asm
6591                 {
6592                     do32bit(&ggen, FLblockoff,&c.IEV1,0);
6593                 }
6594                 else
6595                 {
6596                     ggen.offset += objmod.bytes(seg,ggen.offset,cast(uint)c.IEV1.len,c.IEV1.bytes);
6597                 }
6598                 debug
6599                 assert(calccodsize(c) == c.IEV1.len);
6600 
6601                 continue;
6602 
6603             default:
6604                 break;
6605         }
6606         flags = c.Iflags;
6607 
6608         // See if we need to flush (don't have room for largest code sequence)
6609         if (ggen.available() < (1+4+4+8+8))
6610             ggen.flush();
6611 
6612         // see if we need to put out prefix bytes
6613         if (flags & (CFwait | CFPREFIX | CFjmp16))
6614         {
6615             int override_;
6616 
6617             if (flags & CFwait)
6618                 ggen.gen(0x9B);                      // FWAIT
6619                                                 /* ? SEGES : SEGSS      */
6620             switch (flags & CFSEG)
6621             {   case CFes:      override_ = SEGES;       goto segover;
6622                 case CFss:      override_ = SEGSS;       goto segover;
6623                 case CFcs:      override_ = SEGCS;       goto segover;
6624                 case CFds:      override_ = SEGDS;       goto segover;
6625                 case CFfs:      override_ = SEGFS;       goto segover;
6626                 case CFgs:      override_ = SEGGS;       goto segover;
6627                 segover:        ggen.gen(cast(ubyte)override_);
6628                                 break;
6629 
6630                 default:        break;
6631             }
6632 
6633             if (flags & CFaddrsize)
6634                 ggen.gen(0x67);
6635 
6636             // Do this last because of instructions like ADDPD
6637             if (flags & CFopsize)
6638                 ggen.gen(0x66);                      /* operand size         */
6639 
6640             if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */
6641             {
6642                 if (!I16)
6643                 {   // Put out 16 bit conditional jump
6644                     c.Iop = op = 0x0F00 | (0x80 | (op & 0x0F));
6645                 }
6646                 else
6647                 {
6648                     cn = code_calloc();
6649                     /*cxcalloc++;*/
6650                     cn.next = code_next(c);
6651                     c.next= cn;          // link into code
6652                     cn.Iop = JMP;              // JMP block
6653                     cn.IFL2 = c.IFL2;
6654                     cn.IEV2.Vblock = c.IEV2.Vblock;
6655                     c.Iop = op ^= 1;           // toggle condition
6656                     c.IFL2 = FLconst;
6657                     c.IEV2.Vpointer = I16 ? 3 : 5; // skip over JMP block
6658                     c.Iflags &= ~CFjmp16;
6659                 }
6660             }
6661         }
6662 
6663         if (flags & CFvex)
6664         {
6665             if (flags & CFvex3)
6666             {
6667                 ggen.gen(0xC4);
6668                 ggen.gen(cast(ubyte)VEX3_B1(c.Ivex));
6669                 ggen.gen(cast(ubyte)VEX3_B2(c.Ivex));
6670                 ggen.gen(c.Ivex.op);
6671             }
6672             else
6673             {
6674                 ggen.gen(0xC5);
6675                 ggen.gen(cast(ubyte)VEX2_B1(c.Ivex));
6676                 ggen.gen(c.Ivex.op);
6677             }
6678             ins = vex_inssize(c);
6679             goto Lmodrm;
6680         }
6681 
6682         if (op > 0xFF)
6683         {
6684             if ((op & 0xFFFD00) == 0x0F3800)
6685                 ins = inssize2[(op >> 8) & 0xFF];
6686             else if ((op & 0xFF00) == 0x0F00)
6687                 ins = inssize2[op & 0xFF];
6688 
6689             if (op & 0xFF000000)
6690             {
6691                 ubyte op1 = op >> 24;
6692                 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
6693                 {
6694                     ggen.gen(op1);
6695                     if (c.Irex)
6696                         ggen.gen(c.Irex | REX);
6697                 }
6698                 else
6699                 {
6700                     if (c.Irex)
6701                         ggen.gen(c.Irex | REX);
6702                     ggen.gen(op1);
6703                 }
6704                 ggen.gen((op >> 16) & 0xFF);
6705                 ggen.gen((op >> 8) & 0xFF);
6706                 ggen.gen(op & 0xFF);
6707             }
6708             else if (op & 0xFF0000)
6709             {
6710                 ubyte op1 = cast(ubyte)(op >> 16);
6711                 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
6712                 {
6713                     ggen.gen(op1);
6714                     if (c.Irex)
6715                         ggen.gen(c.Irex | REX);
6716                 }
6717                 else
6718                 {
6719                     if (c.Irex)
6720                         ggen.gen(c.Irex | REX);
6721                     ggen.gen(op1);
6722                 }
6723                 ggen.gen((op >> 8) & 0xFF);
6724                 ggen.gen(op & 0xFF);
6725             }
6726             else
6727             {
6728                 if (c.Irex)
6729                     ggen.gen(c.Irex | REX);
6730                 ggen.gen((op >> 8) & 0xFF);
6731                 ggen.gen(op & 0xFF);
6732             }
6733         }
6734         else
6735         {
6736             if (c.Irex)
6737                 ggen.gen(c.Irex | REX);
6738             ggen.gen(cast(ubyte)op);
6739         }
6740   Lmodrm:
6741         if (ins & M)            /* if modregrm byte             */
6742         {
6743             rm = c.Irm;
6744             ggen.gen(rm);
6745 
6746             // Look for an address size override when working with the
6747             // MOD R/M and SIB bytes
6748 
6749             if (is32bitaddr( I32, flags))
6750             {
6751                 if (issib(rm))
6752                     ggen.gen(c.Isib);
6753                 switch (rm & 0xC0)
6754                 {
6755                     case 0x40:
6756                         do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1);     // 8 bit
6757                         break;
6758 
6759                     case 0:
6760                         if (!(issib(rm) && (c.Isib & 7) == 5 ||
6761                               (rm & 7) == 5))
6762                             break;
6763                         goto case 0x80;
6764 
6765                     case 0x80:
6766                     {
6767                         int cfflags = CFoff;
6768                         targ_size_t val = 0;
6769                         if (I64)
6770                         {
6771                             if ((rm & modregrm(3,0,7)) == modregrm(0,0,5))      // if disp32[RIP]
6772                             {
6773                                 cfflags |= CFpc32;
6774                                 val = -4;
6775                                 reg_t reg = rm & modregrm(0,7,0);
6776                                 if (ins & T ||
6777                                     ((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0))))
6778                                 {   if (ins & E || op == 0xF6)
6779                                         val = -5;
6780                                     else if (c.Iflags & CFopsize)
6781                                         val = -6;
6782                                     else
6783                                         val = -8;
6784                                 }
6785 static if (TARGET_OSX || TARGET_WINDOS)
6786 {
6787                                 /* Mach-O and Win64 fixups already take the 4 byte size
6788                                  * into account, so bias by 4
6789         `                        */
6790                                 val += 4;
6791 }
6792                             }
6793                         }
6794                         do32bit(&ggen, cast(FL)c.IFL1,&c.IEV1,cfflags,cast(int)val);
6795                         break;
6796                     }
6797 
6798                     default:
6799                         break;
6800                 }
6801             }
6802             else
6803             {
6804                 switch (rm & 0xC0)
6805                 {   case 0x40:
6806                         do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1);     // 8 bit
6807                         break;
6808 
6809                     case 0:
6810                         if ((rm & 7) != 6)
6811                             break;
6812                         goto case 0x80;
6813 
6814                     case 0x80:
6815                         do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,CFoff);
6816                         break;
6817 
6818                     default:
6819                         break;
6820                 }
6821             }
6822         }
6823         else
6824         {
6825             if (op == 0xC8)
6826                 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,0);
6827         }
6828         flags &= CFseg | CFoff | CFselfrel;
6829         if (ins & T)                    /* if second operand            */
6830         {
6831             if (ins & E)            /* if data-8                    */
6832                 do8bit(&ggen, cast(FL) c.IFL2,&c.IEV2);
6833             else if (!I16)
6834             {
6835                 switch (op)
6836                 {
6837                     case 0xC2:              /* RETN imm16           */
6838                     case 0xCA:              /* RETF imm16           */
6839                     do16:
6840                         do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
6841                         break;
6842 
6843                     case 0xA1:
6844                     case 0xA3:
6845                         if (I64 && c.Irex)
6846                         {
6847                     do64:
6848                             do64bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
6849                             break;
6850                         }
6851                         goto case 0xA0;
6852 
6853                     case 0xA0:              /* MOV AL,byte ptr []   */
6854                     case 0xA2:
6855                         if (c.Iflags & CFaddrsize && !I64)
6856                             goto do16;
6857                         else
6858                     do32:
6859                             do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
6860                         break;
6861 
6862                     case 0x9A:
6863                     case 0xEA:
6864                         if (c.Iflags & CFopsize)
6865                             goto ptr1616;
6866                         else
6867                             goto ptr1632;
6868 
6869                     case 0x68:              // PUSH immed32
6870                         if (cast(FL)c.IFL2 == FLblock)
6871                         {
6872                             c.IFL2 = FLblockoff;
6873                             goto do32;
6874                         }
6875                         else
6876                             goto case_default;
6877 
6878                     case CALL:              // CALL rel
6879                     case JMP:               // JMP  rel
6880                         flags |= CFselfrel;
6881                         goto case_default;
6882 
6883                     default:
6884                         if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32
6885                             flags |= CFselfrel;
6886                         if (I64 && (op & ~7) == 0xB8 && c.Irex & REX_W)
6887                             goto do64;
6888                     case_default:
6889                         if (c.Iflags & CFopsize)
6890                             goto do16;
6891                         else
6892                             goto do32;
6893                 }
6894             }
6895             else
6896             {
6897                 switch (op)
6898                 {
6899                     case 0xC2:
6900                     case 0xCA:
6901                         goto do16;
6902 
6903                     case 0xA0:
6904                     case 0xA1:
6905                     case 0xA2:
6906                     case 0xA3:
6907                         if (c.Iflags & CFaddrsize)
6908                             goto do32;
6909                         else
6910                             goto do16;
6911 
6912                     case 0x9A:
6913                     case 0xEA:
6914                         if (c.Iflags & CFopsize)
6915                             goto ptr1632;
6916                         else
6917                             goto ptr1616;
6918 
6919                     ptr1616:
6920                     ptr1632:
6921                         //assert(c.IFL2 == FLfunc);
6922                         ggen.flush();
6923                         if (c.IFL2 == FLdatseg)
6924                         {
6925                             objmod.reftodatseg(seg,ggen.offset,c.IEV2.Vpointer,
6926                                     c.IEV2.Vseg,flags);
6927                             ggen.offset += 4;
6928                         }
6929                         else
6930                         {
6931                             s = c.IEV2.Vsym;
6932                             ggen.offset += objmod.reftoident(seg,ggen.offset,s,0,flags);
6933                         }
6934                         break;
6935 
6936                     case 0x68:              // PUSH immed16
6937                         if (cast(FL)c.IFL2 == FLblock)
6938                         {   c.IFL2 = FLblockoff;
6939                             goto do16;
6940                         }
6941                         else
6942                             goto case_default16;
6943 
6944                     case CALL:
6945                     case JMP:
6946                         flags |= CFselfrel;
6947                         goto default;
6948 
6949                     default:
6950                     case_default16:
6951                         if (c.Iflags & CFopsize)
6952                             goto do32;
6953                         else
6954                             goto do16;
6955                 }
6956             }
6957         }
6958         else if (op == 0xF6)            /* TEST mem8,immed8             */
6959         {
6960             if ((rm & (7<<3)) == 0)
6961                 do8bit(&ggen, cast(FL)c.IFL2,&c.IEV2);
6962         }
6963         else if (op == 0xF7)
6964         {
6965             if ((rm & (7<<3)) == 0)     /* TEST mem16/32,immed16/32     */
6966             {
6967                 if ((I32 || I64) ^ ((c.Iflags & CFopsize) != 0))
6968                     do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
6969                 else
6970                     do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
6971             }
6972         }
6973 
6974         debug
6975         if (ggen.getOffset() - startoffset != calccodsize(c))
6976         {
6977             printf("actual: %d, calc: %d\n", cast(int)(ggen.getOffset() - startoffset), cast(int)calccodsize(c));
6978             code_print(c);
6979             assert(0);
6980         }
6981     }
6982     ggen.flush();
6983     Offset(seg) = ggen.offset;
6984     //printf("-codout(), Coffset = x%x\n", Offset(seg));
6985     return cast(uint)ggen.offset;                      /* ending address               */
6986 }
6987 
6988 
6989 private void do64bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags)
6990 {
6991     char *p;
6992     Symbol *s;
6993     targ_size_t ad;
6994 
6995     assert(I64);
6996     switch (fl)
6997     {
6998         case FLconst:
6999             ad = *cast(targ_size_t *) uev;
7000         L1:
7001             pbuf.genp(8,&ad);
7002             return;
7003 
7004         case FLdatseg:
7005             pbuf.flush();
7006             objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,CFoffset64 | flags);
7007             break;
7008 
7009         case FLframehandler:
7010             framehandleroffset = pbuf.getOffset();
7011             ad = 0;
7012             goto L1;
7013 
7014         case FLswitch:
7015             pbuf.flush();
7016             ad = uev.Vswitch.Btableoffset;
7017             if (config.flags & CFGromable)
7018                     objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7019             else
7020                     objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff);
7021             break;
7022 
7023         case FLcsdata:
7024         case FLfardata:
7025             //symbol_print(uev.Vsym);
7026             // NOTE: In ELFOBJ all symbol refs have been tagged FLextern
7027             // strings and statics are treated like offsets from a
7028             // un-named external with is the start of .rodata or .data
7029         case FLextern:                      /* external data symbol         */
7030         case FLtlsdata:
7031 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
7032 {
7033         case FLgot:
7034         case FLgotoff:
7035 }
7036             pbuf.flush();
7037             s = uev.Vsym;               /* symbol pointer               */
7038             objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags);
7039             break;
7040 
7041 static if (TARGET_OSX)
7042 {
7043         case FLgot:
7044             funcsym_p.Slocalgotoffset = pbuf.getOffset();
7045             ad = 0;
7046             goto L1;
7047 }
7048 
7049         case FLfunc:                        /* function call                */
7050             s = uev.Vsym;               /* symbol pointer               */
7051             assert(TARGET_SEGMENTED || !tyfarfunc(s.ty()));
7052             pbuf.flush();
7053             objmod.reftoident(pbuf.seg,pbuf.offset,s,0,CFoffset64 | flags);
7054             break;
7055 
7056         case FLblock:                       /* displacement to another block */
7057             ad = uev.Vblock.Boffset - pbuf.getOffset() - 4;
7058             //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad);
7059             goto L1;
7060 
7061         case FLblockoff:
7062             pbuf.flush();
7063             assert(uev.Vblock);
7064             //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset);
7065             objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset);
7066             break;
7067 
7068         default:
7069             WRFL(fl);
7070             assert(0);
7071     }
7072     pbuf.offset += 8;
7073 }
7074 
7075 
7076 private void do32bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags, int val)
7077 {
7078     char *p;
7079     Symbol *s;
7080     targ_size_t ad;
7081 
7082     //printf("do32bit(flags = x%x)\n", flags);
7083     switch (fl)
7084     {
7085         case FLconst:
7086             assert(targ_size_t.sizeof == 4 || targ_size_t.sizeof == 8);
7087             ad = * cast(targ_size_t *) uev;
7088         L1:
7089             pbuf.genp(4,&ad);
7090             return;
7091 
7092         case FLdatseg:
7093             pbuf.flush();
7094             objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags);
7095             break;
7096 
7097         case FLframehandler:
7098             framehandleroffset = pbuf.getOffset();
7099             ad = 0;
7100             goto L1;
7101 
7102         case FLswitch:
7103             pbuf.flush();
7104             ad = uev.Vswitch.Btableoffset;
7105             if (config.flags & CFGromable)
7106             {
7107                 static if (TARGET_OSX)
7108                 {
7109                     // These are magic values based on the exact code generated for the switch jump
7110                     if (I64)
7111                         uev.Vswitch.Btablebase = pbuf.getOffset() + 4;
7112                     else
7113                         uev.Vswitch.Btablebase = pbuf.getOffset() + 4 - 8;
7114                     ad -= uev.Vswitch.Btablebase;
7115                     goto L1;
7116                 }
7117                 else static if (TARGET_WINDOS)
7118                 {
7119                     if (I64)
7120                     {
7121                         uev.Vswitch.Btablebase = pbuf.getOffset() + 4;
7122                         ad -= uev.Vswitch.Btablebase;
7123                         goto L1;
7124                     }
7125                     else
7126                         objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7127                 }
7128                 else
7129                 {
7130                     objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7131                 }
7132             }
7133             else
7134                     objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff);
7135             break;
7136 
7137         case FLcode:
7138             //assert(JMPJMPTABLE);            // the only use case
7139             pbuf.flush();
7140             ad = *cast(targ_size_t *) uev + pbuf.getOffset();
7141             objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7142             break;
7143 
7144         case FLcsdata:
7145         case FLfardata:
7146             //symbol_print(uev.Vsym);
7147 
7148             // NOTE: In ELFOBJ all symbol refs have been tagged FLextern
7149             // strings and statics are treated like offsets from a
7150             // un-named external with is the start of .rodata or .data
7151         case FLextern:                      /* external data symbol         */
7152         case FLtlsdata:
7153     static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS)
7154     {
7155         case FLgot:
7156         case FLgotoff:
7157     }
7158             pbuf.flush();
7159             s = uev.Vsym;               /* symbol pointer               */
7160             if (TARGET_WINDOS && I64 && (flags & CFpc32))
7161             {
7162                 /* This is for those funky fixups where the location to be fixed up
7163                  * is a 'val' amount back from the current RIP, biased by adding 4.
7164                  */
7165                 assert(val >= -5 && val <= 0);
7166                 flags |= (-val & 7) << 24;          // set CFREL value
7167                 assert(CFREL == (7 << 24));
7168                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags);
7169             }
7170             else
7171                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags);
7172             break;
7173 
7174     static if (TARGET_OSX)
7175     {
7176         case FLgot:
7177             funcsym_p.Slocalgotoffset = pbuf.getOffset();
7178             ad = 0;
7179             goto L1;
7180     }
7181 
7182         case FLfunc:                        /* function call                */
7183             s = uev.Vsym;               /* symbol pointer               */
7184             if (tyfarfunc(s.ty()))
7185             {   /* Large code references are always absolute    */
7186                 pbuf.flush();
7187                 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 4;
7188             }
7189             else if (s.Sseg == pbuf.seg &&
7190                      (s.Sclass == SCstatic || s.Sclass == SCglobal) &&
7191                      s.Sxtrnnum == 0 && flags & CFselfrel)
7192             {   /* if we know it's relative address     */
7193                 ad = s.Soffset - pbuf.getOffset() - 4;
7194                 goto L1;
7195             }
7196             else
7197             {
7198                 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty()));
7199                 pbuf.flush();
7200                 objmod.reftoident(pbuf.seg,pbuf.offset,s,val,flags);
7201             }
7202             break;
7203 
7204         case FLblock:                       /* displacement to another block */
7205             ad = uev.Vblock.Boffset - pbuf.getOffset() - 4;
7206             //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad);
7207             goto L1;
7208 
7209         case FLblockoff:
7210             pbuf.flush();
7211             assert(uev.Vblock);
7212             //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset);
7213             objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset);
7214             break;
7215 
7216         default:
7217             WRFL(fl);
7218             assert(0);
7219     }
7220     pbuf.offset += 4;
7221 }
7222 
7223 
7224 private void do16bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags)
7225 {
7226     char *p;
7227     Symbol *s;
7228     targ_size_t ad;
7229 
7230     switch (fl)
7231     {
7232         case FLconst:
7233             pbuf.genp(2,cast(char *) uev);
7234             return;
7235 
7236         case FLdatseg:
7237             pbuf.flush();
7238             objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags);
7239             break;
7240 
7241         case FLswitch:
7242             pbuf.flush();
7243             ad = uev.Vswitch.Btableoffset;
7244             if (config.flags & CFGromable)
7245                 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7246             else
7247                 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff);
7248             break;
7249 
7250         case FLcsdata:
7251         case FLfardata:
7252         case FLextern:                      /* external data symbol         */
7253         case FLtlsdata:
7254             //assert(SIXTEENBIT || TARGET_SEGMENTED);
7255             pbuf.flush();
7256             s = uev.Vsym;               /* symbol pointer               */
7257             objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags);
7258             break;
7259 
7260         case FLfunc:                        /* function call                */
7261             //assert(SIXTEENBIT || TARGET_SEGMENTED);
7262             s = uev.Vsym;               /* symbol pointer               */
7263             if (tyfarfunc(s.ty()))
7264             {   /* Large code references are always absolute    */
7265                 pbuf.flush();
7266                 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 2;
7267             }
7268             else if (s.Sseg == pbuf.seg &&
7269                      (s.Sclass == SCstatic || s.Sclass == SCglobal) &&
7270                      s.Sxtrnnum == 0 && flags & CFselfrel)
7271             {   /* if we know it's relative address     */
7272                 ad = s.Soffset - pbuf.getOffset() - 2;
7273                 goto L1;
7274             }
7275             else
7276             {
7277                 pbuf.flush();
7278                 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags);
7279             }
7280             break;
7281 
7282         case FLblock:                       /* displacement to another block */
7283             ad = uev.Vblock.Boffset - pbuf.getOffset() - 2;
7284             debug
7285             {
7286                 targ_ptrdiff_t delta = uev.Vblock.Boffset - pbuf.getOffset() - 2;
7287                 assert(cast(short)delta == delta);
7288             }
7289         L1:
7290             pbuf.genp(2,&ad);                    // displacement
7291             return;
7292 
7293         case FLblockoff:
7294             pbuf.flush();
7295             objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset);
7296             break;
7297 
7298         default:
7299             WRFL(fl);
7300             assert(0);
7301     }
7302     pbuf.offset += 2;
7303 }
7304 
7305 
7306 private void do8bit(MiniCodeBuf *pbuf, FL fl, evc *uev)
7307 {
7308     char c;
7309     targ_ptrdiff_t delta;
7310 
7311     switch (fl)
7312     {
7313         case FLconst:
7314             c = cast(char)uev.Vuns;
7315             break;
7316 
7317         case FLblock:
7318             delta = uev.Vblock.Boffset - pbuf.getOffset() - 1;
7319             if (cast(byte)delta != delta)
7320             {
7321                 version (MARS)
7322                 {
7323                     if (uev.Vblock.Bsrcpos.Slinnum)
7324                         printf("%s(%d): ", uev.Vblock.Bsrcpos.Sfilename, uev.Vblock.Bsrcpos.Slinnum);
7325                 }
7326                 printf("block displacement of %lld exceeds the maximum offset of -128 to 127.\n", cast(long)delta);
7327                 err_exit();
7328             }
7329             c = cast(char)delta;
7330             debug assert(uev.Vblock.Boffset > pbuf.getOffset() || c != 0x7F);
7331             break;
7332 
7333         default:
7334             debug printf("fl = %d\n",fl);
7335             assert(0);
7336     }
7337     pbuf.gen(c);
7338 }
7339 
7340 
7341 /**********************************
7342  */
7343 
7344 version (SCPP)
7345 {
7346 static if (HYDRATE)
7347 {
7348 void code_hydrate(code **pc)
7349 {
7350     code *c;
7351     ubyte ins,rm;
7352     FL fl;
7353 
7354     assert(pc);
7355     while (*pc)
7356     {
7357         c = cast(code *) ph_hydrate(cast(void**)pc);
7358         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
7359             ins = vex_inssize(c);
7360         else if ((c.Iop & 0xFFFD00) == 0x0F3800)
7361             ins = inssize2[(c.Iop >> 8) & 0xFF];
7362         else if ((c.Iop & 0xFF00) == 0x0F00)
7363             ins = inssize2[c.Iop & 0xFF];
7364         else
7365             ins = inssize[c.Iop & 0xFF];
7366         switch (c.Iop)
7367         {
7368             default:
7369                 break;
7370 
7371             case ESCAPE | ESClinnum:
7372                 srcpos_hydrate(&c.IEV1.Vsrcpos);
7373                 goto done;
7374 
7375             case ESCAPE | ESCctor:
7376             case ESCAPE | ESCdtor:
7377                 el_hydrate(&c.IEV1.Vtor);
7378                 goto done;
7379 
7380             case ASM:
7381                 ph_hydrate(cast(void**)&c.IEV1.bytes);
7382                 goto done;
7383         }
7384         if (!(ins & M) ||
7385             ((rm = c.Irm) & 0xC0) == 0xC0)
7386             goto do2;           /* if no first operand          */
7387         if (is32bitaddr(I32,c.Iflags))
7388         {
7389 
7390             if (
7391                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
7392                )
7393                 goto do2;       /* if no first operand  */
7394         }
7395         else
7396         {
7397             if (
7398                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
7399                )
7400                 goto do2;       /* if no first operand  */
7401         }
7402         fl = cast(FL) c.IFL1;
7403         switch (fl)
7404         {
7405             case FLudata:
7406             case FLdata:
7407             case FLreg:
7408             case FLauto:
7409             case FLfast:
7410             case FLbprel:
7411             case FLpara:
7412             case FLcsdata:
7413             case FLfardata:
7414             case FLtlsdata:
7415             case FLfunc:
7416             case FLpseudo:
7417             case FLextern:
7418                 assert(flinsymtab[fl]);
7419                 symbol_hydrate(&c.IEV1.Vsym);
7420                 symbol_debug(c.IEV1.Vsym);
7421                 break;
7422 
7423             case FLdatseg:
7424             case FLfltreg:
7425             case FLallocatmp:
7426             case FLcs:
7427             case FLndp:
7428             case FLoffset:
7429             case FLlocalsize:
7430             case FLconst:
7431             case FLframehandler:
7432                 assert(!flinsymtab[fl]);
7433                 break;
7434 
7435             case FLcode:
7436                 ph_hydrate(cast(void**)&c.IEV1.Vcode);
7437                 break;
7438 
7439             case FLblock:
7440             case FLblockoff:
7441                 ph_hydrate(cast(void**)&c.IEV1.Vblock);
7442                 break;
7443 version (SCPP)
7444 {
7445             case FLctor:
7446             case FLdtor:
7447                 el_hydrate(cast(elem**)&c.IEV1.Vtor);
7448                 break;
7449 }
7450             case FLasm:
7451                 ph_hydrate(cast(void**)&c.IEV1.bytes);
7452                 break;
7453 
7454             default:
7455                 WRFL(fl);
7456                 assert(0);
7457         }
7458     do2:
7459         /* Ignore TEST (F6 and F7) opcodes      */
7460         if (!(ins & T))
7461             goto done;          /* if no second operand */
7462 
7463         fl = cast(FL) c.IFL2;
7464         switch (fl)
7465         {
7466             case FLudata:
7467             case FLdata:
7468             case FLreg:
7469             case FLauto:
7470             case FLfast:
7471             case FLbprel:
7472             case FLpara:
7473             case FLcsdata:
7474             case FLfardata:
7475             case FLtlsdata:
7476             case FLfunc:
7477             case FLpseudo:
7478             case FLextern:
7479                 assert(flinsymtab[fl]);
7480                 symbol_hydrate(&c.IEV2.Vsym);
7481                 symbol_debug(c.IEV2.Vsym);
7482                 break;
7483 
7484             case FLdatseg:
7485             case FLfltreg:
7486             case FLallocatmp:
7487             case FLcs:
7488             case FLndp:
7489             case FLoffset:
7490             case FLlocalsize:
7491             case FLconst:
7492             case FLframehandler:
7493                 assert(!flinsymtab[fl]);
7494                 break;
7495 
7496             case FLcode:
7497                 ph_hydrate(cast(void**)&c.IEV2.Vcode);
7498                 break;
7499 
7500             case FLblock:
7501             case FLblockoff:
7502                 ph_hydrate(cast(void**)&c.IEV2.Vblock);
7503                 break;
7504 
7505             default:
7506                 WRFL(fl);
7507                 assert(0);
7508         }
7509   done:
7510         { }
7511 
7512         pc = &c.next;
7513     }
7514 }
7515 }
7516 
7517 /**********************************
7518  */
7519 
7520 static if (DEHYDRATE)
7521 {
7522 void code_dehydrate(code **pc)
7523 {
7524     code *c;
7525     ubyte ins,rm;
7526     FL fl;
7527 
7528     while ((c = *pc) != null)
7529     {
7530         ph_dehydrate(pc);
7531 
7532         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
7533             ins = vex_inssize(c);
7534         else if ((c.Iop & 0xFFFD00) == 0x0F3800)
7535             ins = inssize2[(c.Iop >> 8) & 0xFF];
7536         else if ((c.Iop & 0xFF00) == 0x0F00)
7537             ins = inssize2[c.Iop & 0xFF];
7538         else
7539             ins = inssize[c.Iop & 0xFF];
7540         switch (c.Iop)
7541         {
7542             default:
7543                 break;
7544 
7545             case ESCAPE | ESClinnum:
7546                 srcpos_dehydrate(&c.IEV1.Vsrcpos);
7547                 goto done;
7548 
7549             case ESCAPE | ESCctor:
7550             case ESCAPE | ESCdtor:
7551                 el_dehydrate(&c.IEV1.Vtor);
7552                 goto done;
7553 
7554             case ASM:
7555                 ph_dehydrate(&c.IEV1.bytes);
7556                 goto done;
7557         }
7558 
7559         if (!(ins & M) ||
7560             ((rm = c.Irm) & 0xC0) == 0xC0)
7561             goto do2;           /* if no first operand          */
7562         if (is32bitaddr(I32,c.Iflags))
7563         {
7564 
7565             if (
7566                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
7567                )
7568                 goto do2;       /* if no first operand  */
7569         }
7570         else
7571         {
7572             if (
7573                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
7574                )
7575                 goto do2;       /* if no first operand  */
7576         }
7577         fl = cast(FL) c.IFL1;
7578         switch (fl)
7579         {
7580             case FLudata:
7581             case FLdata:
7582             case FLreg:
7583             case FLauto:
7584             case FLfast:
7585             case FLbprel:
7586             case FLpara:
7587             case FLcsdata:
7588             case FLfardata:
7589             case FLtlsdata:
7590             case FLfunc:
7591             case FLpseudo:
7592             case FLextern:
7593                 assert(flinsymtab[fl]);
7594                 symbol_dehydrate(&c.IEV1.Vsym);
7595                 break;
7596 
7597             case FLdatseg:
7598             case FLfltreg:
7599             case FLallocatmp:
7600             case FLcs:
7601             case FLndp:
7602             case FLoffset:
7603             case FLlocalsize:
7604             case FLconst:
7605             case FLframehandler:
7606                 assert(!flinsymtab[fl]);
7607                 break;
7608 
7609             case FLcode:
7610                 ph_dehydrate(&c.IEV1.Vcode);
7611                 break;
7612 
7613             case FLblock:
7614             case FLblockoff:
7615                 ph_dehydrate(&c.IEV1.Vblock);
7616                 break;
7617 version (SCPP)
7618 {
7619             case FLctor:
7620             case FLdtor:
7621                 el_dehydrate(&c.IEV1.Vtor);
7622                 break;
7623 }
7624             case FLasm:
7625                 ph_dehydrate(&c.IEV1.bytes);
7626                 break;
7627 
7628             default:
7629                 WRFL(fl);
7630                 assert(0);
7631                 break;
7632         }
7633     do2:
7634         /* Ignore TEST (F6 and F7) opcodes      */
7635         if (!(ins & T))
7636             goto done;          /* if no second operand */
7637 
7638         fl = cast(FL) c.IFL2;
7639         switch (fl)
7640         {
7641             case FLudata:
7642             case FLdata:
7643             case FLreg:
7644             case FLauto:
7645             case FLfast:
7646             case FLbprel:
7647             case FLpara:
7648             case FLcsdata:
7649             case FLfardata:
7650             case FLtlsdata:
7651             case FLfunc:
7652             case FLpseudo:
7653             case FLextern:
7654                 assert(flinsymtab[fl]);
7655                 symbol_dehydrate(&c.IEV2.Vsym);
7656                 break;
7657 
7658             case FLdatseg:
7659             case FLfltreg:
7660             case FLallocatmp:
7661             case FLcs:
7662             case FLndp:
7663             case FLoffset:
7664             case FLlocalsize:
7665             case FLconst:
7666             case FLframehandler:
7667                 assert(!flinsymtab[fl]);
7668                 break;
7669 
7670             case FLcode:
7671                 ph_dehydrate(&c.IEV2.Vcode);
7672                 break;
7673 
7674             case FLblock:
7675             case FLblockoff:
7676                 ph_dehydrate(&c.IEV2.Vblock);
7677                 break;
7678 
7679             default:
7680                 WRFL(fl);
7681                 assert(0);
7682                 break;
7683         }
7684   done:
7685         pc = &code_next(c);
7686     }
7687 }
7688 }
7689 }
7690 
7691 /***************************
7692  * Debug code to dump code structure.
7693  */
7694 
7695 void WRcodlst(code *c)
7696 {
7697     for (; c; c = code_next(c))
7698         code_print(c);
7699 }
7700 
7701 extern (C) void code_print(code* c)
7702 {
7703     ubyte ins;
7704     ubyte rexb;
7705 
7706     if (c == null)
7707     {
7708         printf("code 0\n");
7709         return;
7710     }
7711 
7712     const op = c.Iop;
7713     if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
7714         ins = vex_inssize(c);
7715     else if ((c.Iop & 0xFFFD00) == 0x0F3800)
7716         ins = inssize2[(op >> 8) & 0xFF];
7717     else if ((c.Iop & 0xFF00) == 0x0F00)
7718         ins = inssize2[op & 0xFF];
7719     else
7720         ins = inssize[op & 0xFF];
7721 
7722     printf("code %p: nxt=%p ",c,code_next(c));
7723 
7724     if (c.Iflags & CFvex)
7725     {
7726         if (c.Iflags & CFvex3)
7727         {
7728             printf("vex=0xC4");
7729             printf(" 0x%02X", VEX3_B1(c.Ivex));
7730             printf(" 0x%02X", VEX3_B2(c.Ivex));
7731             rexb =
7732                 ( c.Ivex.w ? REX_W : 0) |
7733                 (!c.Ivex.r ? REX_R : 0) |
7734                 (!c.Ivex.x ? REX_X : 0) |
7735                 (!c.Ivex.b ? REX_B : 0);
7736         }
7737         else
7738         {
7739             printf("vex=0xC5");
7740             printf(" 0x%02X", VEX2_B1(c.Ivex));
7741             rexb = !c.Ivex.r ? REX_R : 0;
7742         }
7743         printf(" ");
7744     }
7745     else
7746         rexb = c.Irex;
7747 
7748     if (rexb)
7749     {
7750         printf("rex=0x%02X ", c.Irex);
7751         if (rexb & REX_W)
7752             printf("W");
7753         if (rexb & REX_R)
7754             printf("R");
7755         if (rexb & REX_X)
7756             printf("X");
7757         if (rexb & REX_B)
7758             printf("B");
7759         printf(" ");
7760     }
7761     printf("op=0x%02X",op);
7762 
7763     if ((op & 0xFF) == ESCAPE)
7764     {
7765         if ((op & 0xFF00) == ESClinnum)
7766         {
7767             printf(" linnum = %d\n",c.IEV1.Vsrcpos.Slinnum);
7768             return;
7769         }
7770         printf(" ESCAPE %d",c.Iop >> 8);
7771     }
7772     if (c.Iflags)
7773         printf(" flg=%x",c.Iflags);
7774     if (ins & M)
7775     {
7776         uint rm = c.Irm;
7777         printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7);
7778         if (!I16 && issib(rm))
7779         {
7780             ubyte sib = c.Isib;
7781             printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7);
7782         }
7783         if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40)
7784         {
7785             switch (c.IFL1)
7786             {
7787                 case FLconst:
7788                 case FLoffset:
7789                     printf(" int = %4d",c.IEV1.Vuns);
7790                     break;
7791 
7792                 case FLblock:
7793                     printf(" block = %p",c.IEV1.Vblock);
7794                     break;
7795 
7796                 case FLswitch:
7797                 case FLblockoff:
7798                 case FLlocalsize:
7799                 case FLframehandler:
7800                 case 0:
7801                     break;
7802 
7803                 case FLdatseg:
7804                     printf(" %d.%llx",c.IEV1.Vseg,cast(ulong)c.IEV1.Vpointer);
7805                     break;
7806 
7807                 case FLauto:
7808                 case FLfast:
7809                 case FLreg:
7810                 case FLdata:
7811                 case FLudata:
7812                 case FLpara:
7813                 case FLbprel:
7814                 case FLtlsdata:
7815                     printf(" sym='%s'",c.IEV1.Vsym.Sident.ptr);
7816                     break;
7817 
7818                 case FLextern:
7819                     printf(" FLextern offset = %4d",cast(int)c.IEV1.Voffset);
7820                     break;
7821 
7822                 default:
7823                     WRFL(cast(FL)c.IFL1);
7824                     break;
7825             }
7826         }
7827     }
7828     if (ins & T)
7829     {
7830         printf(" ");
7831         WRFL(cast(FL)c.IFL2);
7832         switch (c.IFL2)
7833         {
7834             case FLconst:
7835                 printf(" int = %4d",c.IEV2.Vuns);
7836                 break;
7837 
7838             case FLblock:
7839                 printf(" block = %p",c.IEV2.Vblock);
7840                 break;
7841 
7842             case FLswitch:
7843             case FLblockoff:
7844             case 0:
7845             case FLlocalsize:
7846             case FLframehandler:
7847                 break;
7848 
7849             case FLdatseg:
7850                 printf(" %d.%llx",c.IEV2.Vseg,cast(ulong)c.IEV2.Vpointer);
7851                 break;
7852 
7853             case FLauto:
7854             case FLfast:
7855             case FLreg:
7856             case FLpara:
7857             case FLbprel:
7858             case FLfunc:
7859             case FLdata:
7860             case FLudata:
7861             case FLtlsdata:
7862                 printf(" sym='%s'",c.IEV2.Vsym.Sident.ptr);
7863                 break;
7864 
7865             case FLcode:
7866                 printf(" code = %p",c.IEV2.Vcode);
7867                 break;
7868 
7869             default:
7870                 WRFL(cast(FL)c.IFL2);
7871                 break;
7872         }
7873     }
7874     printf("\n");
7875 }
7876 
7877 }