1 /**
2  * Compiler implementation of the
3  * $(LINK2 http://www.dlang.org, D programming language).
4  *
5  * Copyright:   Copyright (C) 1994-1998 by Symantec
6  *              Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved
7  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
8  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
9  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod3.d, backend/cod3.d)
10  * Documentation:  https://dlang.org/phobos/dmd_backend_cod3.html
11  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod3.d
12  */
13 
14 module dmd.backend.cod3;
15 
16 version (SCPP)
17     version = COMPILE;
18 version (MARS)
19     version = COMPILE;
20 
21 version (COMPILE)
22 {
23 
24 import core.stdc.stdio;
25 import core.stdc.stdlib;
26 import core.stdc.string;
27 
28 import dmd.backend.backend;
29 import dmd.backend.cc;
30 import dmd.backend.cdef;
31 import dmd.backend.cgcse;
32 import dmd.backend.code;
33 import dmd.backend.code_x86;
34 import dmd.backend.codebuilder;
35 import dmd.backend.dlist;
36 import dmd.backend.dvec;
37 import dmd.backend.melf;
38 import dmd.backend.mem;
39 import dmd.backend.el;
40 import dmd.backend.exh;
41 import dmd.backend.global;
42 import dmd.backend.obj;
43 import dmd.backend.oper;
44 import dmd.backend.outbuf;
45 import dmd.backend.rtlsym;
46 import dmd.backend.symtab;
47 import dmd.backend.ty;
48 import dmd.backend.type;
49 import dmd.backend.xmm;
50 
51 version (SCPP)
52 {
53     import parser;
54     import precomp;
55 }
56 
57 extern (C++):
58 
59 nothrow:
60 
61 version (MARS)
62     enum MARS = true;
63 else
64     enum MARS = false;
65 
66 int REGSIZE();
67 
68 extern __gshared CGstate cgstate;
69 extern __gshared ubyte[FLMAX] segfl;
70 extern __gshared bool[FLMAX] stackfl, flinsymtab;
71 
72 private extern (D) uint mask(uint m) { return 1 << m; }
73 
74 //private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); }
75 
76 extern __gshared targ_size_t retsize;
77 
78 enum JMPJMPTABLE = false;               // benchmarking shows it's slower
79 
80 enum MINLL =           0x8000_0000_0000_0000L;
81 enum MAXLL =           0x7FFF_FFFF_FFFF_FFFFL;
82 
83 /*************
84  * Size in bytes of each instruction.
85  * 0 means illegal instruction.
86  * bit  M:      if there is a modregrm field (EV1 is reserved for modregrm)
87  * bit  T:      if there is a second operand (EV2)
88  * bit  E:      if second operand is only 8 bits
89  * bit  A:      a short version exists for the AX reg
90  * bit  R:      a short version exists for regs
91  * bits 2..0:   size of instruction (excluding optional bytes)
92  */
93 
94 enum
95 {
96     M = 0x80,
97     T = 0x40,
98     E = 0x20,
99     A = 0x10,
100     R = 0x08,
101     W = 0,
102 }
103 
104 private __gshared ubyte[256] inssize =
105 [       M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 00 */
106         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 08 */
107         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 10 */
108         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 18 */
109         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 20 */
110         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 28 */
111         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 30 */
112         M|2,M|2,M|2,M|2,        T|E|2,T|3,1,1,          /* 38 */
113         1,1,1,1,                1,1,1,1,                /* 40 */
114         1,1,1,1,                1,1,1,1,                /* 48 */
115         1,1,1,1,                1,1,1,1,                /* 50 */
116         1,1,1,1,                1,1,1,1,                /* 58 */
117         1,1,M|2,M|2,            1,1,1,1,                /* 60 */
118         T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1,               /* 68 */
119         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* 70 */
120         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* 78 */
121         M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3,      M|2,M|2,M|2,M|A|R|2, /* 80 */
122         M|A|2,M|A|2,M|A|2,M|A|2,        M|2,M|2,M|2,M|R|2,      /* 88 */
123         1,1,1,1,                1,1,1,1,                /* 90 */
124         1,1,T|5,1,              1,1,1,1,                /* 98 */
125 
126      // cod3_set32() patches this
127     //  T|5,T|5,T|5,T|5,        1,1,1,1,                /* A0 */
128         T|3,T|3,T|3,T|3,        1,1,1,1,                /* A0 */
129 
130         T|E|2,T|3,1,1,          1,1,1,1,                /* A8 */
131         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* B0 */
132         T|3,T|3,T|3,T|3,        T|3,T|3,T|3,T|3,                /* B8 */
133         M|T|E|3,M|T|E|3,T|3,1,  M|2,M|2,M|T|E|R|3,M|T|R|4,      /* C0 */
134         T|E|4,1,T|3,1,          1,T|E|2,1,1,            /* C8 */
135         M|2,M|2,M|2,M|2,        T|E|2,T|E|2,0,1,        /* D0 */
136         /* For the floating instructions, allow room for the FWAIT      */
137         M|2,M|2,M|2,M|2,        M|2,M|2,M|2,M|2,        /* D8 */
138         T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2,       /* E0 */
139         T|3,T|3,T|5,T|E|2,              1,1,1,1,                /* E8 */
140         1,0,1,1,                1,1,M|A|2,M|A|2,                /* F0 */
141         1,1,1,1,                1,1,M|2,M|R|2                   /* F8 */
142 ];
143 
144 private __gshared const ubyte[256] inssize32 =
145 [       2,2,2,2,        2,5,1,1,                /* 00 */
146         2,2,2,2,        2,5,1,1,                /* 08 */
147         2,2,2,2,        2,5,1,1,                /* 10 */
148         2,2,2,2,        2,5,1,1,                /* 18 */
149         2,2,2,2,        2,5,1,1,                /* 20 */
150         2,2,2,2,        2,5,1,1,                /* 28 */
151         2,2,2,2,        2,5,1,1,                /* 30 */
152         2,2,2,2,        2,5,1,1,                /* 38 */
153         1,1,1,1,        1,1,1,1,                /* 40 */
154         1,1,1,1,        1,1,1,1,                /* 48 */
155         1,1,1,1,        1,1,1,1,                /* 50 */
156         1,1,1,1,        1,1,1,1,                /* 58 */
157         1,1,2,2,        1,1,1,1,                /* 60 */
158         5,6,2,3,        1,1,1,1,                /* 68 */
159         2,2,2,2,        2,2,2,2,                /* 70 */
160         2,2,2,2,        2,2,2,2,                /* 78 */
161         3,6,3,3,        2,2,2,2,                /* 80 */
162         2,2,2,2,        2,2,2,2,                /* 88 */
163         1,1,1,1,        1,1,1,1,                /* 90 */
164         1,1,7,1,        1,1,1,1,                /* 98 */
165         5,5,5,5,        1,1,1,1,                /* A0 */
166         2,5,1,1,        1,1,1,1,                /* A8 */
167         2,2,2,2,        2,2,2,2,                /* B0 */
168         5,5,5,5,        5,5,5,5,                /* B8 */
169         3,3,3,1,        2,2,3,6,                /* C0 */
170         4,1,3,1,        1,2,1,1,                /* C8 */
171         2,2,2,2,        2,2,0,1,                /* D0 */
172         /* For the floating instructions, don't need room for the FWAIT */
173         2,2,2,2,        2,2,2,2,                /* D8 */
174 
175         2,2,2,2,        2,2,2,2,                /* E0 */
176         5,5,7,2,        1,1,1,1,                /* E8 */
177         1,0,1,1,        1,1,2,2,                /* F0 */
178         1,1,1,1,        1,1,2,2                 /* F8 */
179 ];
180 
181 /* For 2 byte opcodes starting with 0x0F        */
182 private __gshared ubyte[256] inssize2 =
183 [       M|3,M|3,M|3,M|3,        2,2,2,2,                // 00
184         2,2,M|3,2,              2,M|3,2,M|T|E|4,        // 08
185         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 10
186         M|3,2,2,2,              2,2,2,2,                // 18
187         M|3,M|3,M|3,M|3,        M|3,2,M|3,2,            // 20
188         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 28
189         2,2,2,2,                2,2,2,2,                // 30
190         M|4,2,M|T|E|5,2,        2,2,2,2,                // 38
191         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 40
192         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 48
193         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 50
194         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 58
195         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 60
196         M|3,M|3,M|3,M|3,        M|3,M|3,M|3,M|3,        // 68
197         M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70
198         2,2,2,2,                M|3,M|3,M|3,M|3,        // 78
199         W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80
200         W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88
201         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // 90
202         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // 98
203         2,2,2,M|3,      M|T|E|4,M|3,2,2,        // A0
204         2,2,2,M|3,      M|T|E|4,M|3,M|3,M|3,    // A8
205         M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,     // B0
206         M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3,     // B8
207         M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3,       // C0
208         2,2,2,2,        2,2,2,2,                // C8
209         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // D0
210         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // D8
211         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // E0
212         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // E8
213         M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3,       // F0
214         M|3,M|3,M|3,M|3, M|3,M|3,M|3,2          // F8
215 ];
216 
217 /*************************************************
218  * Generate code to save `reg` in `regsave` stack area.
219  * Params:
220  *      regsave = register save areay on stack
221  *      cdb = where to write generated code
222  *      reg = register to save
223  *      idx = set to location in regsave for use in REGSAVE_restore()
224  */
225 
226 void REGSAVE_save(ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, out uint idx)
227 {
228     if (isXMMreg(reg))
229     {
230         regsave.alignment = 16;
231         regsave.idx = (regsave.idx + 15) & ~15;
232         idx = regsave.idx;
233         regsave.idx += 16;
234         // MOVD idx[RBP],xmm
235         opcode_t op = STOAPD;
236         if (TARGET_LINUX && I32)
237             // Haven't yet figured out why stack is not aligned to 16
238             op = STOUPD;
239         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx);
240     }
241     else
242     {
243         if (!regsave.alignment)
244             regsave.alignment = REGSIZE;
245         idx = regsave.idx;
246         regsave.idx += REGSIZE;
247         // MOV idx[RBP],reg
248         cdb.genc1(0x89,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx);
249         if (I64)
250             code_orrex(cdb.last(), REX_W);
251     }
252     reflocal = true;
253     if (regsave.idx > regsave.top)
254         regsave.top = regsave.idx;              // keep high water mark
255 }
256 
257 /*******************************
258  * Restore `reg` from `regsave` area.
259  * Complement REGSAVE_save().
260  */
261 
262 void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, uint idx)
263 {
264     if (isXMMreg(reg))
265     {
266         assert(regsave.alignment == 16);
267         // MOVD xmm,idx[RBP]
268         opcode_t op = LODAPD;
269         if (TARGET_LINUX && I32)
270             // Haven't yet figured out why stack is not aligned to 16
271             op = LODUPD;
272         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx);
273     }
274     else
275     {   // MOV reg,idx[RBP]
276         cdb.genc1(0x8B,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx);
277         if (I64)
278             code_orrex(cdb.last(), REX_W);
279     }
280 }
281 
282 /************************************
283  * Size for vex encoded instruction.
284  */
285 
286 ubyte vex_inssize(code *c)
287 {
288     assert(c.Iflags & CFvex && c.Ivex.pfx == 0xC4);
289     ubyte ins;
290     if (c.Iflags & CFvex3)
291     {
292         switch (c.Ivex.mmmm)
293         {
294         case 0: // no prefix
295         case 1: // 0F
296             ins = cast(ubyte)(inssize2[c.Ivex.op] + 2);
297             break;
298         case 2: // 0F 38
299             ins = cast(ubyte)(inssize2[0x38] + 1);
300             break;
301         case 3: // 0F 3A
302             ins = cast(ubyte)(inssize2[0x3A] + 1);
303             break;
304         default:
305             printf("Iop = %x mmmm = %x\n", c.Iop, c.Ivex.mmmm);
306             assert(0);
307         }
308     }
309     else
310     {
311         ins = cast(ubyte)(inssize2[c.Ivex.op] + 1);
312     }
313     return ins;
314 }
315 
316 /************************************
317  * Determine if there is a modregrm byte for code.
318  */
319 
320 int cod3_EA(code *c)
321 {   uint ins;
322 
323     opcode_t op1 = c.Iop & 0xFF;
324     if (op1 == ESCAPE)
325         ins = 0;
326     else if ((c.Iop & 0xFFFD00) == 0x0F3800)
327         ins = inssize2[(c.Iop >> 8) & 0xFF];
328     else if ((c.Iop & 0xFF00) == 0x0F00)
329         ins = inssize2[op1];
330     else
331         ins = inssize[op1];
332     return ins & M;
333 }
334 
335 /********************************
336  * setup ALLREGS and BYTEREGS
337  * called by: codgen
338  */
339 
340 void cod3_initregs()
341 {
342     if (I64)
343     {
344         ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15;
345         BYTEREGS = ALLREGS;
346     }
347     else
348     {
349         ALLREGS = ALLREGS_INIT;
350         BYTEREGS = BYTEREGS_INIT;
351     }
352 }
353 
354 /********************************
355  * set initial global variable values
356  */
357 
358 void cod3_setdefault()
359 {
360     fregsaved = mBP | mSI | mDI;
361 }
362 
363 /********************************
364  * Fix global variables for 386.
365  */
366 
367 void cod3_set32()
368 {
369     inssize[0xA0] = T|5;
370     inssize[0xA1] = T|5;
371     inssize[0xA2] = T|5;
372     inssize[0xA3] = T|5;
373     BPRM = 5;                       /* [EBP] addressing mode        */
374     fregsaved = mBP | mBX | mSI | mDI;      // saved across function calls
375     FLOATREGS = FLOATREGS_32;
376     FLOATREGS2 = FLOATREGS2_32;
377     DOUBLEREGS = DOUBLEREGS_32;
378     if (config.flags3 & CFG3eseqds)
379         fregsaved |= mES;
380 
381     foreach (ref v; inssize2[0x80 .. 0x90])
382         v = W|T|6;
383 
384     TARGET_STACKALIGN = config.fpxmmregs ? 16 : 4;
385 }
386 
387 /********************************
388  * Fix global variables for I64.
389  */
390 
391 void cod3_set64()
392 {
393     inssize[0xA0] = T|5;                // MOV AL,mem
394     inssize[0xA1] = T|5;                // MOV RAX,mem
395     inssize[0xA2] = T|5;                // MOV mem,AL
396     inssize[0xA3] = T|5;                // MOV mem,RAX
397     BPRM = 5;                           // [RBP] addressing mode
398 
399     fregsaved = (config.exe & EX_windos)
400         ? mBP | mBX | mDI | mSI | mR12 | mR13 | mR14 | mR15 | mES | mXMM6 | mXMM7 // also XMM8..15;
401         : mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES;      // saved across function calls
402 
403     FLOATREGS = FLOATREGS_64;
404     FLOATREGS2 = FLOATREGS2_64;
405     DOUBLEREGS = DOUBLEREGS_64;
406 
407     ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI|  mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15;
408     BYTEREGS = ALLREGS;
409 
410     foreach (ref v; inssize2[0x80 .. 0x90])
411         v = W|T|6;
412 
413     TARGET_STACKALIGN = config.fpxmmregs ? 16 : 8;
414 }
415 
416 /*********************************
417  * Word or dword align start of function.
418  * Params:
419  *      seg = segment to write alignment bytes to
420  *      nbytes = number of alignment bytes to write
421  */
422 void cod3_align_bytes(int seg, size_t nbytes)
423 {
424     /* Table 4-2 from Intel Instruction Set Reference M-Z
425      * 1 bytes NOP                                        90
426      * 2 bytes 66 NOP                                     66 90
427      * 3 bytes NOP DWORD ptr [EAX]                        0F 1F 00
428      * 4 bytes NOP DWORD ptr [EAX + 00H]                  0F 1F 40 00
429      * 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H]          0F 1F 44 00 00
430      * 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H]       66 0F 1F 44 00 00
431      * 7 bytes NOP DWORD ptr [EAX + 00000000H]            0F 1F 80 00 00 00 00
432      * 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H]    0F 1F 84 00 00 00 00 00
433      * 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00
434      * only for CPUs: CPUID.01H.EAX[Bytes 11:8] = 0110B or 1111B
435      */
436 
437     assert(SegData[seg].SDseg == seg);
438 
439     while (nbytes)
440     {   size_t n = nbytes;
441         const(char)* p;
442 
443         if (nbytes > 1 && (I64 || config.fpxmmregs))
444         {
445             switch (n)
446             {
447                 case 2:  p = "\x66\x90"; break;
448                 case 3:  p = "\x0F\x1F\x00"; break;
449                 case 4:  p = "\x0F\x1F\x40\x00"; break;
450                 case 5:  p = "\x0F\x1F\x44\x00\x00"; break;
451                 case 6:  p = "\x66\x0F\x1F\x44\x00\x00"; break;
452                 case 7:  p = "\x0F\x1F\x80\x00\x00\x00\x00"; break;
453                 case 8:  p = "\x0F\x1F\x84\x00\x00\x00\x00\x00"; break;
454                 default: p = "\x66\x0F\x1F\x84\x00\x00\x00\x00\x00"; n = 9; break;
455             }
456         }
457         else
458         {
459             static immutable ubyte[15] nops = [
460                 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90
461             ]; // XCHG AX,AX
462             if (n > nops.length)
463                 n = nops.length;
464             p = cast(char*)nops;
465         }
466         objmod.write_bytes(SegData[seg],cast(uint)n,cast(char*)p);
467         nbytes -= n;
468     }
469 }
470 
471 /****************************
472  * Align start of function.
473  * Params:
474  *      seg = segment of function
475  */
476 void cod3_align(int seg)
477 {
478     if (config.exe & EX_windos)
479     {
480         if (config.flags4 & CFG4speed)      // if optimized for speed
481         {
482             // Pick alignment based on CPU target
483             if (config.target_cpu == TARGET_80486 ||
484                 config.target_cpu >= TARGET_PentiumPro)
485             {   // 486 does reads on 16 byte boundaries, so if we are near
486                 // such a boundary, align us to it
487 
488                 const nbytes = -Offset(seg) & 15;
489                 if (nbytes < 8)
490                     cod3_align_bytes(seg, nbytes);
491             }
492         }
493     }
494     else
495     {
496         const nbytes = -Offset(seg) & 7;
497         cod3_align_bytes(seg, nbytes);
498     }
499 }
500 
501 
502 /**********************************
503  * Generate code to adjust the stack pointer by `nbytes`
504  * Params:
505  *      cdb = code builder
506  *      nbytes = number of bytes to adjust stack pointer
507  */
508 void cod3_stackadj(ref CodeBuilder cdb, int nbytes)
509 {
510     //printf("cod3_stackadj(%d)\n", nbytes);
511     uint grex = I64 ? REX_W << 16 : 0;
512     uint rm;
513     if (nbytes > 0)
514         rm = modregrm(3,5,SP); // SUB ESP,nbytes
515     else
516     {
517         nbytes = -nbytes;
518         rm = modregrm(3,0,SP); // ADD ESP,nbytes
519     }
520     cdb.genc2(0x81, grex | rm, nbytes);
521 }
522 
523 /**********************************
524  * Generate code to align the stack pointer at `nbytes`
525  * Params:
526  *      cdb = code builder
527  *      nbytes = number of bytes to align stack pointer
528  */
529 void cod3_stackalign(ref CodeBuilder cdb, int nbytes)
530 {
531     //printf("cod3_stackalign(%d)\n", nbytes);
532     const grex = I64 ? REX_W << 16 : 0;
533     const rm = modregrm(3, 4, SP);             // AND ESP,-nbytes
534     cdb.genc2(0x81, grex | rm, -nbytes);
535 }
536 
537 /* Constructor that links the ModuleReference to the head of
538  * the list pointed to by _Dmoduleref
539  *
540  * For ELF object files.
541  */
542 static if (0)
543 {
544 void cod3_buildmodulector(Outbuffer* buf, int codeOffset, int refOffset)
545 {
546     /*      ret
547      * codeOffset:
548      *      pushad
549      *      mov     EAX,&ModuleReference
550      *      mov     ECX,_DmoduleRef
551      *      mov     EDX,[ECX]
552      *      mov     [EAX],EDX
553      *      mov     [ECX],EAX
554      *      popad
555      *      ret
556      */
557 
558     const int seg = CODE;
559 
560     if (I64 && config.flags3 & CFG3pic)
561     {   // LEA RAX,ModuleReference[RIP]
562         buf.writeByte(REX | REX_W);
563         buf.writeByte(LEA);
564         buf.writeByte(modregrm(0,AX,5));
565         codeOffset += 3;
566         codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_PC32, 3 /*STI_DATA*/, refOffset - 4);
567 
568         // MOV RCX,_DmoduleRef@GOTPCREL[RIP]
569         buf.writeByte(REX | REX_W);
570         buf.writeByte(0x8B);
571         buf.writeByte(modregrm(0,CX,5));
572         codeOffset += 3;
573         codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_GOTPCREL, Obj.external_def("_Dmodule_ref"), -4);
574     }
575     else
576     {
577         /* movl ModuleReference*, %eax */
578         buf.writeByte(0xB8);
579         codeOffset += 1;
580         const uint reltype = I64 ? R_X86_64_32 : R_386_32;
581         codeOffset += Obj.writerel(seg, codeOffset, reltype, 3 /*STI_DATA*/, refOffset);
582 
583         /* movl _Dmodule_ref, %ecx */
584         buf.writeByte(0xB9);
585         codeOffset += 1;
586         codeOffset += Obj.writerel(seg, codeOffset, reltype, Obj.external_def("_Dmodule_ref"), 0);
587     }
588 
589     if (I64)
590         buf.writeByte(REX | REX_W);
591     buf.writeByte(0x8B); buf.writeByte(0x11); /* movl (%ecx), %edx */
592     if (I64)
593         buf.writeByte(REX | REX_W);
594     buf.writeByte(0x89); buf.writeByte(0x10); /* movl %edx, (%eax) */
595     if (I64)
596         buf.writeByte(REX | REX_W);
597     buf.writeByte(0x89); buf.writeByte(0x01); /* movl %eax, (%ecx) */
598 
599     buf.writeByte(0xC3); /* ret */
600 }
601 }
602 
603 /*****************************
604  * Given a type, return a mask of
605  * registers to hold that type.
606  * Input:
607  *      tyf     function type
608  */
609 
610 regm_t regmask(tym_t tym, tym_t tyf)
611 {
612     switch (tybasic(tym))
613     {
614         case TYvoid:
615         case TYstruct:
616         case TYarray:
617             return 0;
618 
619         case TYbool:
620         case TYwchar_t:
621         case TYchar16:
622         case TYchar:
623         case TYschar:
624         case TYuchar:
625         case TYshort:
626         case TYushort:
627         case TYint:
628         case TYuint:
629         case TYnullptr:
630         case TYnptr:
631         case TYnref:
632         case TYsptr:
633         case TYcptr:
634         case TYimmutPtr:
635         case TYsharePtr:
636         case TYrestrictPtr:
637         case TYfgPtr:
638             return mAX;
639 
640         case TYfloat:
641         case TYifloat:
642             if (I64)
643                 return mXMM0;
644             if (config.exe & EX_flat)
645                 return mST0;
646             goto case TYlong;
647 
648         case TYlong:
649         case TYulong:
650         case TYdchar:
651             if (!I16)
652                 return mAX;
653             goto case TYfptr;
654 
655         case TYfptr:
656         case TYhptr:
657             return mDX | mAX;
658 
659         case TYcent:
660         case TYucent:
661             assert(I64);
662             return mDX | mAX;
663 
664         case TYvptr:
665             return mDX | mBX;
666 
667         case TYdouble:
668         case TYdouble_alias:
669         case TYidouble:
670             if (I64)
671                 return mXMM0;
672             if (config.exe & EX_flat)
673                 return mST0;
674             return DOUBLEREGS;
675 
676         case TYllong:
677         case TYullong:
678             return I64 ? cast(regm_t) mAX : (I32 ? mDX | mAX : DOUBLEREGS);
679 
680         case TYldouble:
681         case TYildouble:
682             return mST0;
683 
684         case TYcfloat:
685             if (config.exe & EX_posix && I32 && tybasic(tyf) == TYnfunc)
686                 return mDX | mAX;
687             goto case TYcdouble;
688 
689         case TYcdouble:
690             if (I64)
691                 return mXMM0 | mXMM1;
692             goto case TYcldouble;
693 
694         case TYcldouble:
695             return mST01;
696 
697         // SIMD vector types
698         case TYfloat4:
699         case TYdouble2:
700         case TYschar16:
701         case TYuchar16:
702         case TYshort8:
703         case TYushort8:
704         case TYlong4:
705         case TYulong4:
706         case TYllong2:
707         case TYullong2:
708 
709         case TYfloat8:
710         case TYdouble4:
711         case TYschar32:
712         case TYuchar32:
713         case TYshort16:
714         case TYushort16:
715         case TYlong8:
716         case TYulong8:
717         case TYllong4:
718         case TYullong4:
719             if (!config.fpxmmregs)
720             {   printf("SIMD operations not supported on this platform\n");
721                 exit(1);
722             }
723             return mXMM0;
724 
725         default:
726             debug WRTYxx(tym);
727             assert(0);
728     }
729 }
730 
731 /*******************************
732  * setup register allocator parameters with platform specific data
733  */
734 void cgreg_dst_regs(reg_t* dst_integer_reg, reg_t* dst_float_reg)
735 {
736     *dst_integer_reg = AX;
737     *dst_float_reg   = XMM0;
738 }
739 
740 void cgreg_set_priorities(tym_t ty, const(reg_t)** pseq, const(reg_t)** pseqmsw)
741 {
742     const sz = tysize(ty);
743 
744     if (tyxmmreg(ty))
745     {
746         static immutable ubyte[9] sequence = [XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,NOREG];
747         *pseq = sequence.ptr;
748     }
749     else if (I64)
750     {
751         if (sz == REGSIZE * 2)
752         {
753             static immutable ubyte[3] seqmsw1 = [CX,DX,NOREG];
754             static immutable ubyte[5] seqlsw1 = [AX,BX,SI,DI,NOREG];
755             *pseq = seqlsw1.ptr;
756             *pseqmsw = seqmsw1.ptr;
757         }
758         else
759         {   // R10 is reserved for the static link
760             static immutable ubyte[15] sequence2 = [AX,CX,DX,SI,DI,R8,R9,R11,BX,R12,R13,R14,R15,BP,NOREG];
761             *pseq = cast(ubyte*)sequence2.ptr;
762         }
763     }
764     else if (I32)
765     {
766         if (sz == REGSIZE * 2)
767         {
768             static immutable ubyte[5] seqlsw3 = [AX,BX,SI,DI,NOREG];
769             static immutable ubyte[3] seqmsw3 = [CX,DX,NOREG];
770             *pseq = seqlsw3.ptr;
771             *pseqmsw = seqmsw3.ptr;
772         }
773         else
774         {
775             static immutable ubyte[8] sequence4 = [AX,CX,DX,BX,SI,DI,BP,NOREG];
776             *pseq = sequence4.ptr;
777         }
778     }
779     else
780     {   assert(I16);
781         if (typtr(ty))
782         {
783             // For pointer types, try to pick index register first
784             static immutable ubyte[8] seqidx5 = [BX,SI,DI,AX,CX,DX,BP,NOREG];
785             *pseq = seqidx5.ptr;
786         }
787         else
788         {
789             // Otherwise, try to pick index registers last
790             static immutable ubyte[8] sequence6 = [AX,CX,DX,BX,SI,DI,BP,NOREG];
791             *pseq = sequence6.ptr;
792         }
793     }
794 }
795 
796 /*******************************************
797  * Call finally block.
798  * Params:
799  *      bf = block to call
800  *      retregs = registers to preserve across call
801  * Returns:
802  *      code generated
803  */
804 private code *callFinallyBlock(block *bf, regm_t retregs)
805 {
806     CodeBuilder cdbs; cdbs.ctor();
807     CodeBuilder cdbr; cdbr.ctor();
808     int nalign = 0;
809 
810     calledFinally = true;
811     uint npush = gensaverestore(retregs,cdbs,cdbr);
812 
813     if (STACKALIGN >= 16)
814     {   npush += REGSIZE;
815         if (npush & (STACKALIGN - 1))
816         {   nalign = STACKALIGN - (npush & (STACKALIGN - 1));
817             cod3_stackadj(cdbs, nalign);
818         }
819     }
820     cdbs.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf);
821     regcon.immed.mval = 0;
822     if (nalign)
823         cod3_stackadj(cdbs, -nalign);
824     cdbs.append(cdbr);
825     return cdbs.finish();
826 }
827 
828 /*******************************
829  * Generate block exit code
830  */
831 void outblkexitcode(ref CodeBuilder cdb, block *bl, ref int anyspill, const(char)* sflsave, Symbol** retsym, const regm_t mfuncregsave)
832 {
833     CodeBuilder cdb2; cdb2.ctor();
834     elem *e = bl.Belem;
835     block *nextb;
836     regm_t retregs = 0;
837 
838     if (bl.BC != BCasm)
839         assert(bl.Bcode == null);
840 
841     switch (bl.BC)                     /* block exit condition         */
842     {
843         case BCiftrue:
844         {
845             bool jcond = true;
846             block *bs1 = bl.nthSucc(0);
847             block *bs2 = bl.nthSucc(1);
848             if (bs1 == bl.Bnext)
849             {   // Swap bs1 and bs2
850                 block *btmp;
851 
852                 jcond ^= 1;
853                 btmp = bs1;
854                 bs1 = bs2;
855                 bs2 = btmp;
856             }
857             logexp(cdb,e,jcond,FLblock,cast(code *) bs1);
858             nextb = bs2;
859         }
860         L5:
861             if (configv.addlinenumbers && bl.Bsrcpos.Slinnum &&
862                 !(funcsym_p.ty() & mTYnaked))
863             {
864                 //printf("BCiftrue: %s(%u)\n", bl.Bsrcpos.Sfilename ? bl.Bsrcpos.Sfilename : "", bl.Bsrcpos.Slinnum);
865                 cdb.genlinnum(bl.Bsrcpos);
866             }
867             if (nextb != bl.Bnext)
868             {
869                 assert(!(bl.Bflags & BFLepilog));
870                 genjmp(cdb,JMP,FLblock,nextb);
871             }
872             break;
873 
874         case BCjmptab:
875         case BCifthen:
876         case BCswitch:
877         {
878             assert(!(bl.Bflags & BFLepilog));
879             doswitch(cdb,bl);               // hide messy details
880             break;
881         }
882 version (MARS)
883 {
884         case BCjcatch:          // D catch clause of try-catch
885             assert(ehmethod(funcsym_p) != EHmethod.EH_NONE);
886             // Mark all registers as destroyed. This will prevent
887             // register assignments to variables used in catch blocks.
888             getregs(cdb,lpadregs());
889 
890             if (config.ehmethod == EHmethod.EH_DWARF)
891             {
892                 /* Each block must have ESP set to the same value it was at the end
893                  * of the prolog. But the unwinder calls catch blocks with ESP set
894                  * at the value it was when the throwing function was called, which
895                  * may have arguments pushed on the stack.
896                  * This instruction will reset ESP to the correct offset from EBP.
897                  */
898                 cdb.gen1(ESCAPE | ESCfixesp);
899             }
900             goto case_goto;
901 }
902 version (SCPP)
903 {
904         case BCcatch:           // C++ catch clause of try-catch
905             // Mark all registers as destroyed. This will prevent
906             // register assignments to variables used in catch blocks.
907             getregs(cdb,allregs | mES);
908             goto case_goto;
909 
910         case BCtry:
911             usednteh |= EHtry;
912             if (config.exe == EX_WIN32)
913                 usednteh |= NTEHtry;
914             goto case_goto;
915 }
916         case BCgoto:
917             nextb = bl.nthSucc(0);
918             if ((MARS ||
919                  funcsym_p.Sfunc.Fflags3 & Fnteh) &&
920                 ehmethod(funcsym_p) != EHmethod.EH_DWARF &&
921                 bl.Btry != nextb.Btry &&
922                 nextb.BC != BC_finally)
923             {
924                 regm_t retregsx = 0;
925                 gencodelem(cdb,e,&retregsx,true);
926                 int toindex = nextb.Btry ? nextb.Btry.Bscope_index : -1;
927                 assert(bl.Btry);
928                 int fromindex = bl.Btry.Bscope_index;
929 version (MARS)
930 {
931                 if (toindex + 1 == fromindex)
932                 {   // Simply call __finally
933                     if (bl.Btry &&
934                         bl.Btry.nthSucc(1).BC == BCjcatch)
935                     {
936                         goto L5;        // it's a try-catch, not a try-finally
937                     }
938                 }
939 }
940                 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) ||
941                     config.ehmethod == EHmethod.EH_SEH)
942                 {
943                     nteh_unwind(cdb,0,toindex);
944                 }
945                 else
946                 {
947 version (MARS)
948 {
949                 if (toindex + 1 <= fromindex)
950                 {
951                     //c = cat(c, linux_unwind(0, toindex));
952                     block *bt;
953 
954                     //printf("B%d: fromindex = %d, toindex = %d\n", bl.Bdfoidx, fromindex, toindex);
955                     bt = bl;
956                     while ((bt = bt.Btry) != null && bt.Bscope_index != toindex)
957                     {   block *bf;
958 
959                         //printf("\tbt.Bscope_index = %d, bt.Blast_index = %d\n", bt.Bscope_index, bt.Blast_index);
960                         bf = bt.nthSucc(1);
961                         // Only look at try-finally blocks
962                         if (bf.BC == BCjcatch)
963                             continue;
964 
965                         if (bf == nextb)
966                             continue;
967                         //printf("\tbf = B%d, nextb = B%d\n", bf.Bdfoidx, nextb.Bdfoidx);
968                         if (nextb.BC == BCgoto &&
969                             !nextb.Belem &&
970                             bf == nextb.nthSucc(0))
971                             continue;
972 
973                         // call __finally
974                         cdb.append(callFinallyBlock(bf.nthSucc(0), retregsx));
975                     }
976                 }
977 }
978                 }
979                 goto L5;
980             }
981         case_goto:
982         {
983             regm_t retregsx = 0;
984             gencodelem(cdb,e,&retregsx,true);
985             if (anyspill)
986             {   // Add in the epilog code
987                 CodeBuilder cdbstore; cdbstore.ctor();
988                 CodeBuilder cdbload;  cdbload.ctor();
989 
990                 for (int i = 0; i < anyspill; i++)
991                 {   Symbol *s = globsym[i];
992 
993                     if (s.Sflags & SFLspill &&
994                         vec_testbit(dfoidx,s.Srange))
995                     {
996                         s.Sfl = sflsave[i];    // undo block register assignments
997                         cgreg_spillreg_epilog(bl,s,cdbstore,cdbload);
998                     }
999                 }
1000                 cdb.append(cdbstore);
1001                 cdb.append(cdbload);
1002             }
1003             nextb = bl.nthSucc(0);
1004             goto L5;
1005         }
1006 
1007         case BC_try:
1008             if (config.ehmethod == EHmethod.EH_NONE || funcsym_p.Sfunc.Fflags3 & Feh_none)
1009             {
1010                 /* Need to use frame pointer to access locals, not the stack pointer,
1011                  * because we'll be calling the BC_finally blocks and the stack will be off.
1012                  */
1013                 needframe = 1;
1014             }
1015             else if (config.ehmethod == EHmethod.EH_SEH || config.ehmethod == EHmethod.EH_WIN32)
1016             {
1017                 usednteh |= NTEH_try;
1018                 nteh_usevars();
1019             }
1020             else
1021                 usednteh |= EHtry;
1022             goto case_goto;
1023 
1024         case BC_finally:
1025             if (ehmethod(funcsym_p) == EHmethod.EH_DWARF)
1026             {
1027                 // Mark scratch registers as destroyed.
1028                 getregsNoSave(lpadregs());
1029 
1030                 regm_t retregsx = 0;
1031                 gencodelem(cdb,bl.Belem,&retregsx,true);
1032 
1033                 // JMP bl.nthSucc(1)
1034                 nextb = bl.nthSucc(1);
1035 
1036                 goto L5;
1037             }
1038             else
1039             {
1040                 if (config.ehmethod == EHmethod.EH_SEH ||
1041                     config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none))
1042                 {
1043                     // Mark all registers as destroyed. This will prevent
1044                     // register assignments to variables used in finally blocks.
1045                     getregsNoSave(lpadregs());
1046                 }
1047 
1048                 assert(!e);
1049                 // Generate CALL to finalizer code
1050                 cdb.append(callFinallyBlock(bl.nthSucc(0), 0));
1051 
1052                 // JMP bl.nthSucc(1)
1053                 nextb = bl.nthSucc(1);
1054 
1055                 goto L5;
1056             }
1057 
1058         case BC_lpad:
1059         {
1060             assert(ehmethod(funcsym_p) == EHmethod.EH_DWARF);
1061             // Mark all registers as destroyed. This will prevent
1062             // register assignments to variables used in finally blocks.
1063             getregsNoSave(lpadregs());
1064 
1065             regm_t retregsx = 0;
1066             gencodelem(cdb,bl.Belem,&retregsx,true);
1067 
1068             // JMP bl.nthSucc(0)
1069             nextb = bl.nthSucc(0);
1070             goto L5;
1071         }
1072 
1073         case BC_ret:
1074         {
1075             regm_t retregsx = 0;
1076             gencodelem(cdb,e,&retregsx,true);
1077             if (ehmethod(funcsym_p) == EHmethod.EH_DWARF)
1078             {
1079             }
1080             else
1081                 cdb.gen1(0xC3);   // RET
1082             break;
1083         }
1084 
1085 static if (NTEXCEPTIONS)
1086 {
1087         case BC_except:
1088         {
1089             assert(!e);
1090             usednteh |= NTEH_except;
1091             nteh_setsp(cdb,0x8B);
1092             getregsNoSave(allregs);
1093             nextb = bl.nthSucc(0);
1094             goto L5;
1095         }
1096         case BC_filter:
1097         {
1098             nteh_filter(cdb, bl);
1099             // Mark all registers as destroyed. This will prevent
1100             // register assignments to variables used in filter blocks.
1101             getregsNoSave(allregs);
1102             regm_t retregsx = regmask(e.Ety, TYnfunc);
1103             gencodelem(cdb,e,&retregsx,true);
1104             cdb.gen1(0xC3);   // RET
1105             break;
1106         }
1107 }
1108 
1109         case BCretexp:
1110             reg_t reg1, reg2, lreg, mreg;
1111             retregs = allocretregs(e.Ety, e.ET, funcsym_p.ty(), reg1, reg2);
1112 
1113             lreg = mreg = NOREG;
1114             if (reg1 == NOREG)
1115             {}
1116             else if (tybasic(e.Ety) == TYcfloat)
1117                 lreg = ST01;
1118             else if (mask(reg1) & (mST0 | mST01))
1119                 lreg = reg1;
1120             else if (reg2 == NOREG)
1121                 lreg = reg1;
1122             else if (mask(reg1) & XMMREGS)
1123             {
1124                 lreg = XMM0;
1125                 mreg = XMM1;
1126             }
1127             else
1128             {
1129                 lreg = mask(reg1) & mLSW ? reg1 : AX;
1130                 mreg = mask(reg2) & mMSW ? reg2 : DX;
1131             }
1132             if (reg1 != NOREG)
1133                 retregs = (mask(lreg) | mask(mreg)) & ~mask(NOREG);
1134 
1135             // For the final load into the return regs, don't set regcon.used,
1136             // so that the optimizer can potentially use retregs for register
1137             // variable assignments.
1138 
1139             if (config.flags4 & CFG4optimized)
1140             {   regm_t usedsave;
1141 
1142                 docommas(cdb,&e);
1143                 usedsave = regcon.used;
1144                 if (!OTleaf(e.Eoper))
1145                     gencodelem(cdb,e,&retregs,true);
1146                 else
1147                 {
1148                     if (e.Eoper == OPconst)
1149                         regcon.mvar = 0;
1150                     gencodelem(cdb,e,&retregs,true);
1151                     regcon.used = usedsave;
1152                     if (e.Eoper == OPvar)
1153                     {   Symbol *s = e.EV.Vsym;
1154 
1155                         if (s.Sfl == FLreg && s.Sregm != mAX)
1156                             *retsym = s;
1157                     }
1158                 }
1159             }
1160             else
1161             {
1162                 gencodelem(cdb,e,&retregs,true);
1163             }
1164 
1165             if (reg1 == NOREG)
1166             {
1167             }
1168             else if ((mask(reg1) | mask(reg2)) & (mST0 | mST01))
1169             {
1170                 assert(reg1 == lreg && reg2 == NOREG);
1171                 regm_t pretregs = mask(reg1) | mask(reg2);
1172                 fixresult87(cdb, e, retregs, &pretregs, true);
1173             }
1174             // fix return registers
1175             else if (tybasic(e.Ety) == TYcfloat)
1176             {
1177                 assert(lreg == ST01);
1178                 if (I64)
1179                 {
1180                     assert(reg2 == NOREG);
1181                     // spill
1182                     pop87();
1183                     pop87();
1184                     cdb.genfltreg(0xD9, 3, tysize(TYfloat));
1185                     genfwait(cdb);
1186                     cdb.genfltreg(0xD9, 3, 0);
1187                     genfwait(cdb);
1188                     // reload
1189                     if (config.exe == EX_WIN64)
1190                     {
1191                         assert(reg1 == AX);
1192                         cdb.genfltreg(LOD, reg1, 0);
1193                         code_orrex(cdb.last(), REX_W);
1194                     }
1195                     else
1196                     {
1197                         assert(reg1 == XMM0);
1198                         cdb.genxmmreg(xmmload(TYdouble), reg1, 0, TYdouble);
1199                     }
1200                 }
1201                 else
1202                 {
1203                     assert(reg1 == AX && reg2 == DX);
1204                     regm_t pretregs = mask(reg1) | mask(reg2);
1205                     fixresult_complex87(cdb, e, retregs, &pretregs, true);
1206                 }
1207             }
1208             else if (reg2 == NOREG)
1209                 assert(lreg == reg1);
1210             else for (int v = 0; v < 2; v++)
1211             {
1212                 if (v ^ (reg1 != mreg))
1213                     genmovreg(cdb, reg1, lreg);
1214                 else
1215                     genmovreg(cdb, reg2, mreg);
1216             }
1217             if (reg1 != NOREG)
1218                 retregs = (mask(reg1) | mask(reg2)) & ~mask(NOREG);
1219             goto L4;
1220 
1221         case BCret:
1222             retregs = 0;
1223             gencodelem(cdb,e,&retregs,true);
1224         L4:
1225             if (retregs == mST0)
1226             {   assert(global87.stackused == 1);
1227                 pop87();                // account for return value
1228             }
1229             else if (retregs == mST01)
1230             {   assert(global87.stackused == 2);
1231                 pop87();
1232                 pop87();                // account for return value
1233             }
1234 
1235             if (MARS || usednteh & NTEH_try)
1236             {
1237                 block *bt = bl;
1238                 while ((bt = bt.Btry) != null)
1239                 {
1240                     block *bf = bt.nthSucc(1);
1241 version (MARS)
1242 {
1243                     // Only look at try-finally blocks
1244                     if (bf.BC == BCjcatch)
1245                     {
1246                         continue;
1247                     }
1248 }
1249                     if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) ||
1250                         config.ehmethod == EHmethod.EH_SEH)
1251                     {
1252                         if (bt.Bscope_index == 0)
1253                         {
1254                             // call __finally
1255                             CodeBuilder cdbs; cdbs.ctor();
1256                             CodeBuilder cdbr; cdbr.ctor();
1257 
1258                             nteh_gensindex(cdb,-1);
1259                             gensaverestore(retregs,cdbs,cdbr);
1260                             cdb.append(cdbs);
1261                             cdb.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf.nthSucc(0));
1262                             regcon.immed.mval = 0;
1263                             cdb.append(cdbr);
1264                         }
1265                         else
1266                         {
1267                             nteh_unwind(cdb,retregs,~0);
1268                         }
1269                         break;
1270                     }
1271                     else
1272                     {
1273                         // call __finally
1274                         cdb.append(callFinallyBlock(bf.nthSucc(0), retregs));
1275                     }
1276                 }
1277             }
1278             break;
1279 
1280         case BCexit:
1281             retregs = 0;
1282             gencodelem(cdb,e,&retregs,true);
1283             if (config.flags4 & CFG4optimized)
1284                 mfuncreg = mfuncregsave;
1285             break;
1286 
1287         case BCasm:
1288         {
1289             assert(!e);
1290             // Mark destroyed registers
1291             CodeBuilder cdbx; cdbx.ctor();
1292             getregs(cdbx,iasm_regs(bl));         // mark destroyed registers
1293             code *c = cdbx.finish();
1294             if (bl.Bsucc)
1295             {   nextb = bl.nthSucc(0);
1296                 if (!bl.Bnext)
1297                 {
1298                     cdb.append(bl.Bcode);
1299                     cdb.append(c);
1300                     goto L5;
1301                 }
1302                 if (nextb != bl.Bnext &&
1303                     bl.Bnext &&
1304                     !(bl.Bnext.BC == BCgoto &&
1305                      !bl.Bnext.Belem &&
1306                      nextb == bl.Bnext.nthSucc(0)))
1307                 {
1308                     // See if already have JMP at end of block
1309                     code *cl = code_last(bl.Bcode);
1310                     if (!cl || cl.Iop != JMP)
1311                     {
1312                         cdb.append(bl.Bcode);
1313                         cdb.append(c);
1314                         goto L5;        // add JMP at end of block
1315                     }
1316                 }
1317             }
1318             cdb.append(bl.Bcode);
1319             break;
1320         }
1321 
1322         default:
1323             debug
1324             printf("bl.BC = %d\n",bl.BC);
1325             assert(0);
1326     }
1327 }
1328 
1329 /***************************
1330  * Allocate registers for function return values.
1331  *
1332  * Params:
1333  *    ty    = return type
1334  *    t     = return type extended info
1335  *    tyf   = function type
1336  *    reg1  = set to the first part register, else NOREG
1337  *    reg2  = set to the second part register, else NOREG
1338  *
1339  * Returns:
1340  *    a bit mask of return registers.
1341  *    0 if function returns on the stack or returns void.
1342  */
1343 regm_t allocretregs(const tym_t ty, type* t, const tym_t tyf, out reg_t reg1, out reg_t reg2)
1344 {
1345     //printf("allocretregs()\n");
1346     reg1 = reg2 = NOREG;
1347 
1348     if (!(config.exe & EX_posix))
1349         return regmask(ty, tyf);    // for non-Posix ABI
1350 
1351     /* The rest is for the Itanium ABI
1352      */
1353 
1354     const tyb = tybasic(ty);
1355     if (tyb == TYvoid)
1356         return 0;
1357 
1358     tym_t ty1 = tyb;
1359     tym_t ty2 = TYMAX;  // stays TYMAX if only one register is needed
1360 
1361     if (ty & mTYxmmgpr)
1362     {
1363         ty1 = TYdouble;
1364         ty2 = TYllong;
1365     }
1366     else if (ty & mTYgprxmm)
1367     {
1368         ty1 = TYllong;
1369         ty2 = TYdouble;
1370     }
1371 
1372     if (tyb == TYstruct)
1373     {
1374         assert(t);
1375         ty1 = t.Tty;
1376     }
1377 
1378     const tyfb = tybasic(tyf);
1379     switch (tyrelax(ty1))
1380     {
1381         case TYcent:
1382             if (I32)
1383                 return 0;
1384             ty1 = ty2 = TYllong;
1385             break;
1386 
1387         case TYcdouble:
1388             if (tyfb == TYjfunc && I32)
1389                 break;
1390             if (I32)
1391                 return 0;
1392             ty1 = ty2 = TYdouble;
1393             break;
1394 
1395         case TYcfloat:
1396             if (tyfb == TYjfunc && I32)
1397                 break;
1398             if (I32)
1399                 goto case TYllong;
1400             ty1 = TYdouble;
1401             break;
1402 
1403         case TYcldouble:
1404             if (tyfb == TYjfunc && I32)
1405                 break;
1406             if (I32)
1407                 return 0;
1408             break;
1409 
1410         case TYllong:
1411             if (I32)
1412                 ty1 = ty2 = TYlong;
1413             break;
1414 
1415         case TYarray:
1416             type* targ1, targ2;
1417             argtypes(t, targ1, targ2);
1418             if (targ1)
1419                 ty1 = targ1.Tty;
1420             else
1421                 return 0;
1422             if (targ2)
1423                 ty2 = targ2.Tty;
1424             break;
1425 
1426         case TYstruct:
1427             assert(t);
1428             if (I64)
1429             {
1430                 assert(tybasic(t.Tty) == TYstruct);
1431                 if (const targ1 = t.Ttag.Sstruct.Sarg1type)
1432                     ty1 = targ1.Tty;
1433                 else
1434                     return 0;
1435                 if (const targ2 = t.Ttag.Sstruct.Sarg2type)
1436                     ty2 = targ2.Tty;
1437                 break;
1438             }
1439             return 0;
1440 
1441         default:
1442             break;
1443     }
1444 
1445     /* now we have ty1 and ty2, use that to determine which register
1446      * is used for ty1 and which for ty2
1447      */
1448 
1449     static struct RetRegsAllocator
1450     {
1451     nothrow:
1452         static immutable reg_t[2] gpr_regs = [AX, DX];
1453         static immutable reg_t[2] xmm_regs = [XMM0, XMM1];
1454 
1455         uint cntgpr = 0,
1456              cntxmm = 0;
1457 
1458         reg_t gpr() { return gpr_regs[cntgpr++]; }
1459         reg_t xmm() { return xmm_regs[cntxmm++]; }
1460     }
1461 
1462     RetRegsAllocator rralloc;
1463 
1464     reg_t allocreg(tym_t tym)
1465     {
1466         if (tym == TYMAX)
1467             return NOREG;
1468         switch (tysize(tym))
1469         {
1470         case 1:
1471         case 2:
1472         case 4:
1473             if (tyfloating(tym))
1474                 return I64 ? rralloc.xmm() : ST0;
1475             else
1476                 return rralloc.gpr();
1477 
1478         case 8:
1479             if (tycomplex(tym))
1480             {
1481                 assert(tyfb == TYjfunc && I32);
1482                 return ST01;
1483             }
1484             assert(I64 || tyfloating(tym));
1485             goto case 4;
1486 
1487         default:
1488             if (tybasic(tym) == TYldouble || tybasic(tym) == TYildouble)
1489             {
1490                 return ST0;
1491             }
1492             else if (tybasic(tym) == TYcldouble)
1493             {
1494                 return ST01;
1495             }
1496             else if (tycomplex(tym) && tyfb == TYjfunc && I32)
1497             {
1498                 return ST01;
1499             }
1500             else if (tysimd(tym))
1501             {
1502                 return rralloc.xmm();
1503             }
1504 
1505             debug WRTYxx(tym);
1506             assert(0);
1507         }
1508     }
1509 
1510     reg1 = allocreg(ty1);
1511     reg2 = allocreg(ty2);
1512 
1513     return (mask(reg1) | mask(reg2)) & ~mask(NOREG);
1514 }
1515 
1516 /***********************************************
1517  * Struct necessary for sorting switch cases.
1518  */
1519 
1520 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*);
1521 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar);
1522 
1523 extern (C)  // qsort cmp functions need to be "C"
1524 {
1525 struct CaseVal
1526 {
1527     targ_ullong val;
1528     block *target;
1529 
1530     /* Sort function for qsort() */
1531     extern (C) static nothrow int cmp(scope const(void*) p, scope const(void*) q)
1532     {
1533         const(CaseVal)* c1 = cast(const(CaseVal)*)p;
1534         const(CaseVal)* c2 = cast(const(CaseVal)*)q;
1535         return (c1.val < c2.val) ? -1 : ((c1.val == c2.val) ? 0 : 1);
1536     }
1537 }
1538 }
1539 
1540 /***
1541  * Generate comparison of [reg2,reg] with val
1542  */
1543 private void cmpval(ref CodeBuilder cdb, targ_llong val, uint sz, reg_t reg, reg_t reg2, reg_t sreg)
1544 {
1545     if (I64 && sz == 8)
1546     {
1547         assert(reg2 == NOREG);
1548         if (val == cast(int)val)    // if val is a 64 bit value sign-extended from 32 bits
1549         {
1550             cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val);     // CMP reg,value32
1551             cdb.last().Irex |= REX_W;                  // 64 bit operand
1552         }
1553         else
1554         {
1555             assert(sreg != NOREG);
1556             movregconst(cdb,sreg,cast(targ_size_t)val,64);  // MOV sreg,val64
1557             genregs(cdb,0x3B,reg,sreg);    // CMP reg,sreg
1558             code_orrex(cdb.last(), REX_W);
1559             getregsNoSave(mask(sreg));                  // don't remember we loaded this constant
1560         }
1561     }
1562     else if (reg2 == NOREG)
1563         cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val);         // CMP reg,casevalue
1564     else
1565     {
1566         cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val));  // CMP reg2,MSREG(casevalue)
1567         code *cnext = gennop(null);
1568         genjmp(cdb,JNE,FLcode,cast(block *) cnext);  // JNE cnext
1569         cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)val);          // CMP reg,casevalue
1570         cdb.append(cnext);
1571     }
1572 }
1573 
1574 private void ifthen(ref CodeBuilder cdb, CaseVal *casevals, size_t ncases,
1575         uint sz, reg_t reg, reg_t reg2, reg_t sreg, block *bdefault, bool last)
1576 {
1577     if (ncases >= 4 && config.flags4 & CFG4speed)
1578     {
1579         size_t pivot = ncases >> 1;
1580 
1581         // Compares for casevals[0..pivot]
1582         CodeBuilder cdb1; cdb1.ctor();
1583         ifthen(cdb1, casevals, pivot, sz, reg, reg2, sreg, bdefault, true);
1584 
1585         // Compares for casevals[pivot+1..ncases]
1586         CodeBuilder cdb2; cdb2.ctor();
1587         ifthen(cdb2, casevals + pivot + 1, ncases - pivot - 1, sz, reg, reg2, sreg, bdefault, last);
1588         code *c2 = gennop(null);
1589 
1590         // Compare for caseval[pivot]
1591         cmpval(cdb, casevals[pivot].val, sz, reg, reg2, sreg);
1592         genjmp(cdb,JE,FLblock,casevals[pivot].target); // JE target
1593         // Note uint jump here, as cases were sorted using uint comparisons
1594         genjmp(cdb,JA,FLcode,cast(block *) c2);           // JG c2
1595 
1596         cdb.append(cdb1);
1597         cdb.append(c2);
1598         cdb.append(cdb2);
1599     }
1600     else
1601     {   // Not worth doing a binary search, just do a sequence of CMP/JE
1602         for (size_t n = 0; n < ncases; n++)
1603         {
1604             targ_llong val = casevals[n].val;
1605             cmpval(cdb, val, sz, reg, reg2, sreg);
1606             code *cnext = null;
1607             if (reg2 != NOREG)
1608             {
1609                 cnext = gennop(null);
1610                 genjmp(cdb,JNE,FLcode,cast(block *) cnext);  // JNE cnext
1611                 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val));   // CMP reg2,MSREG(casevalue)
1612             }
1613             genjmp(cdb,JE,FLblock,casevals[n].target);   // JE caseaddr
1614             cdb.append(cnext);
1615         }
1616 
1617         if (last)       // if default is not next block
1618             genjmp(cdb,JMP,FLblock,bdefault);
1619     }
1620 }
1621 
1622 /*******************************
1623  * Generate code for blocks ending in a switch statement.
1624  * Take BCswitch and decide on
1625  *      BCifthen        use if - then code
1626  *      BCjmptab        index into jump table
1627  *      BCswitch        search table for match
1628  */
1629 
1630 void doswitch(ref CodeBuilder cdb, block *b)
1631 {
1632     targ_ulong msw;
1633 
1634     // If switch tables are in code segment and we need a CS: override to get at them
1635     bool csseg = cast(bool)(config.flags & CFGromable);
1636 
1637     //printf("doswitch(%d)\n", b.BC);
1638     elem *e = b.Belem;
1639     elem_debug(e);
1640     docommas(cdb,&e);
1641     cgstate.stackclean++;
1642     tym_t tys = tybasic(e.Ety);
1643     int sz = _tysize[tys];
1644     bool dword = (sz == 2 * REGSIZE);
1645     bool mswsame = true;                // assume all msw's are the same
1646     targ_llong *p = b.Bswitch;          // pointer to case data
1647     assert(p);
1648     uint ncases = cast(uint)*p++;       // number of cases
1649 
1650     targ_llong vmax = MINLL;            // smallest possible llong
1651     targ_llong vmin = MAXLL;            // largest possible llong
1652     for (uint n = 0; n < ncases; n++)   // find max and min case values
1653     {
1654         targ_llong val = *p++;
1655         if (val > vmax) vmax = val;
1656         if (val < vmin) vmin = val;
1657         if (REGSIZE == 2)
1658         {
1659             ushort ms = (val >> 16) & 0xFFFF;
1660             if (n == 0)
1661                 msw = ms;
1662             else if (msw != ms)
1663                 mswsame = 0;
1664         }
1665         else // REGSIZE == 4
1666         {
1667             targ_ulong ms = (val >> 32) & 0xFFFFFFFF;
1668             if (n == 0)
1669                 msw = ms;
1670             else if (msw != ms)
1671                 mswsame = 0;
1672         }
1673     }
1674     p -= ncases;
1675     //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin);
1676 
1677     /* Three kinds of switch strategies - pick one
1678      */
1679     if (ncases <= 3)
1680         goto Lifthen;
1681     else if (I16 && cast(targ_ullong)(vmax - vmin) <= ncases * 2)
1682         goto Ljmptab;           // >=50% of the table is case values, rest is default
1683     else if (cast(targ_ullong)(vmax - vmin) <= ncases * 3)
1684         goto Ljmptab;           // >= 33% of the table is case values, rest is default
1685     else if (I16)
1686         goto Lswitch;
1687     else
1688         goto Lifthen;
1689 
1690     /*************************************************************************/
1691     {   // generate if-then sequence
1692     Lifthen:
1693         regm_t retregs = ALLREGS;
1694         b.BC = BCifthen;
1695         scodelem(cdb,e,&retregs,0,true);
1696         reg_t reg, reg2;
1697         if (dword)
1698         {   reg = findreglsw(retregs);
1699             reg2 = findregmsw(retregs);
1700         }
1701         else
1702         {
1703             reg = findreg(retregs);     // reg that result is in
1704             reg2 = NOREG;
1705         }
1706         list_t bl = b.Bsucc;
1707         block *bdefault = b.nthSucc(0);
1708         if (dword && mswsame)
1709         {
1710             cdb.genc2(0x81,modregrm(3,7,reg2),msw);   // CMP reg2,MSW
1711             genjmp(cdb,JNE,FLblock,bdefault);  // JNE default
1712             reg2 = NOREG;
1713         }
1714 
1715         reg_t sreg = NOREG;                          // may need a scratch register
1716 
1717         // Put into casevals[0..ncases] so we can sort then slice
1718         CaseVal *casevals = cast(CaseVal *)malloc(ncases * CaseVal.sizeof);
1719         assert(casevals);
1720         for (uint n = 0; n < ncases; n++)
1721         {
1722             casevals[n].val = p[n];
1723             bl = list_next(bl);
1724             casevals[n].target = list_block(bl);
1725 
1726             // See if we need a scratch register
1727             if (sreg == NOREG && I64 && sz == 8 && p[n] != cast(int)p[n])
1728             {   regm_t regm = ALLREGS & ~mask(reg);
1729                 allocreg(cdb,&regm, &sreg, TYint);
1730             }
1731         }
1732 
1733         // Sort cases so we can do a runtime binary search
1734         qsort(casevals, ncases, CaseVal.sizeof, &CaseVal.cmp);
1735 
1736         //for (uint n = 0; n < ncases; n++)
1737             //printf("casevals[%lld] = x%x\n", n, casevals[n].val);
1738 
1739         // Generate binary tree of comparisons
1740         ifthen(cdb, casevals, ncases, sz, reg, reg2, sreg, bdefault, bdefault != b.Bnext);
1741 
1742         free(casevals);
1743 
1744         cgstate.stackclean--;
1745         return;
1746     }
1747 
1748     /*************************************************************************/
1749     {
1750         // Use switch value to index into jump table
1751     Ljmptab:
1752         //printf("Ljmptab:\n");
1753 
1754         b.BC = BCjmptab;
1755 
1756         /* If vmin is small enough, we can just set it to 0 and the jump
1757          * table entries from 0..vmin-1 can be set with the default target.
1758          * This saves the SUB instruction.
1759          * Must be same computation as used in outjmptab().
1760          */
1761         if (vmin > 0 && vmin <= _tysize[TYint])
1762             vmin = 0;
1763 
1764         b.Btablesize = cast(int) (vmax - vmin + 1) * tysize(TYnptr);
1765         regm_t retregs = IDXREGS;
1766         if (dword)
1767             retregs |= mMSW;
1768         if (config.exe & EX_posix && I32 && config.flags3 & CFG3pic)
1769             retregs &= ~mBX;                            // need EBX for GOT
1770         bool modify = (I16 || I64 || vmin);
1771         scodelem(cdb,e,&retregs,0,!modify);
1772         reg_t reg = findreg(retregs & IDXREGS); // reg that result is in
1773         reg_t reg2;
1774         if (dword)
1775             reg2 = findregmsw(retregs);
1776         if (modify)
1777         {
1778             assert(!(retregs & regcon.mvar));
1779             getregs(cdb,retregs);
1780         }
1781         if (vmin)                       // if there is a minimum
1782         {
1783             cdb.genc2(0x81,modregrm(3,5,reg),cast(targ_size_t)vmin); // SUB reg,vmin
1784             if (dword)
1785             {   cdb.genc2(0x81,modregrm(3,3,reg2),cast(targ_size_t)MSREG(vmin)); // SBB reg2,vmin
1786                 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
1787             }
1788         }
1789         else if (dword)
1790         {   gentstreg(cdb,reg2);              // TEST reg2,reg2
1791             genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
1792         }
1793         if (vmax - vmin != REGMASK)     // if there is a maximum
1794         {                               // CMP reg,vmax-vmin
1795             cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)(vmax-vmin));
1796             if (I64 && sz == 8)
1797                 code_orrex(cdb.last(), REX_W);
1798             genjmp(cdb,JA,FLblock,b.nthSucc(0));  // JA default
1799         }
1800         if (I64)
1801         {
1802             if (!vmin)
1803             {   // Need to clear out high 32 bits of reg
1804                 // Use 8B instead of 89, as 89 will be optimized away as a NOP
1805                 genregs(cdb,0x8B,reg,reg);                 // MOV reg,reg
1806             }
1807             if (config.flags3 & CFG3pic || config.exe == EX_WIN64)
1808             {
1809                 /* LEA    R1,disp[RIP]          48 8D 05 00 00 00 00
1810                  * MOVSXD R2,[reg*4][R1]        48 63 14 B8
1811                  * LEA    R1,[R1][R2]           48 8D 04 02
1812                  * JMP    R1                    FF E0
1813                  */
1814                 reg_t r1;
1815                 regm_t scratchm = ALLREGS & ~mask(reg);
1816                 allocreg(cdb,&scratchm,&r1,TYint);
1817                 reg_t r2;
1818                 scratchm = ALLREGS & ~(mask(reg) | mask(r1));
1819                 allocreg(cdb,&scratchm,&r2,TYint);
1820 
1821                 CodeBuilder cdbe; cdbe.ctor();
1822                 cdbe.genc1(LEA,(REX_W << 16) | modregxrm(0,r1,5),FLswitch,0);        // LEA R1,disp[RIP]
1823                 cdbe.last().IEV1.Vswitch = b;
1824                 cdbe.gen2sib(0x63,(REX_W << 16) | modregxrm(0,r2,4), modregxrmx(2,reg,r1)); // MOVSXD R2,[reg*4][R1]
1825                 cdbe.gen2sib(LEA,(REX_W << 16) | modregxrm(0,r1,4),modregxrmx(0,r1,r2));    // LEA R1,[R1][R2]
1826                 cdbe.gen2(0xFF,modregrmx(3,4,r1));                                          // JMP R1
1827 
1828                 b.Btablesize = cast(int) (vmax - vmin + 1) * 4;
1829                 code *ce = cdbe.finish();
1830                 pinholeopt(ce, null);
1831 
1832                 cdb.append(cdbe);
1833             }
1834             else
1835             {
1836                 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0);   // JMP disp[reg*8]
1837                 cdb.last().IEV1.Vswitch = b;
1838                 cdb.last().Isib = modregrm(3,reg & 7,5);
1839                 if (reg & 8)
1840                     cdb.last().Irex |= REX_X;
1841             }
1842         }
1843         else if (I32)
1844         {
1845 static if (JMPJMPTABLE)
1846 {
1847             /* LEA jreg,offset ctable[reg][reg * 4]
1848                JMP jreg
1849               ctable:
1850                JMP case0
1851                JMP case1
1852                ...
1853              */
1854             CodeBuilder ctable; ctable.ctor();
1855             block *bdef = b.nthSucc(0);
1856             targ_llong u;
1857             for (u = vmin; ; u++)
1858             {   block *targ = bdef;
1859                 for (n = 0; n < ncases; n++)
1860                 {
1861                     if (p[n] == u)
1862                     {   targ = b.nthSucc(n + 1);
1863                         break;
1864                     }
1865                 }
1866                 genjmp(ctable,JMP,FLblock,targ);
1867                 ctable.last().Iflags |= CFjmp5;           // don't shrink these
1868                 if (u == vmax)
1869                     break;
1870             }
1871 
1872             // Allocate scratch register jreg
1873             regm_t scratchm = ALLREGS & ~mask(reg);
1874             uint jreg = AX;
1875             allocreg(cdb,&scratchm,&jreg,TYint);
1876 
1877             // LEA jreg, offset ctable[reg][reg*4]
1878             cdb.genc1(LEA,modregrm(2,jreg,4),FLcode,6);
1879             cdb.last().Isib = modregrm(2,reg,reg);
1880             cdb.gen2(0xFF,modregrm(3,4,jreg));      // JMP jreg
1881             cdb.append(ctable);
1882             b.Btablesize = 0;
1883             cgstate.stackclean--;
1884             return;
1885 }
1886 else
1887 {
1888         if (config.exe & (EX_OSX | EX_OSX64))
1889         {
1890             /*     CALL L1
1891              * L1: POP  R1
1892              *     ADD  R1,disp[reg*4][R1]
1893              *     JMP  R1
1894              */
1895             // Allocate scratch register r1
1896             regm_t scratchm = ALLREGS & ~mask(reg);
1897             reg_t r1;
1898             allocreg(cdb,&scratchm,&r1,TYint);
1899 
1900             cdb.genc2(CALL,0,0);                           //     CALL L1
1901             cdb.gen1(0x58 + r1);                           // L1: POP R1
1902             cdb.genc1(0x03,modregrm(2,r1,4),FLswitch,0);   // ADD R1,disp[reg*4][EBX]
1903             cdb.last().IEV1.Vswitch = b;
1904             cdb.last().Isib = modregrm(2,reg,r1);
1905             cdb.gen2(0xFF,modregrm(3,4,r1));               // JMP R1
1906         }
1907         else
1908         {
1909             if (config.flags3 & CFG3pic)
1910             {
1911                 /* MOV  R1,EBX
1912                  * SUB  R1,funcsym_p@GOTOFF[offset][reg*4][EBX]
1913                  * JMP  R1
1914                  */
1915 
1916                 // Load GOT in EBX
1917                 load_localgot(cdb);
1918 
1919                 // Allocate scratch register r1
1920                 regm_t scratchm = ALLREGS & ~(mask(reg) | mBX);
1921                 reg_t r1;
1922                 allocreg(cdb,&scratchm,&r1,TYint);
1923 
1924                 genmovreg(cdb,r1,BX);              // MOV R1,EBX
1925                 cdb.genc1(0x2B,modregxrm(2,r1,4),FLswitch,0);   // SUB R1,disp[reg*4][EBX]
1926                 cdb.last().IEV1.Vswitch = b;
1927                 cdb.last().Isib = modregrm(2,reg,BX);
1928                 cdb.gen2(0xFF,modregrmx(3,4,r1));               // JMP R1
1929             }
1930             else
1931             {
1932                 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0);     // JMP disp[idxreg*4]
1933                 cdb.last().IEV1.Vswitch = b;
1934                 cdb.last().Isib = modregrm(2,reg,5);
1935             }
1936         }
1937 }
1938         }
1939         else if (I16)
1940         {
1941             cdb.gen2(0xD1,modregrm(3,4,reg));                   // SHL reg,1
1942             uint rm = getaddrmode(retregs) | modregrm(0,4,0);
1943             cdb.genc1(0xFF,rm,FLswitch,0);                  // JMP [CS:]disp[idxreg]
1944             cdb.last().IEV1.Vswitch = b;
1945             cdb.last().Iflags |= csseg ? CFcs : 0;                       // segment override
1946         }
1947         else
1948             assert(0);
1949         cgstate.stackclean--;
1950         return;
1951     }
1952 
1953     /*************************************************************************/
1954     {
1955         /* Scan a table of case values, and jump to corresponding address.
1956          * Since it relies on REPNE SCASW, it has really nothing to recommend it
1957          * over Lifthen for 32 and 64 bit code.
1958          * Note that it has not been tested with MACHOBJ (OSX).
1959          */
1960     Lswitch:
1961         regm_t retregs = mAX;                  // SCASW requires AX
1962         if (dword)
1963             retregs |= mDX;
1964         else if (ncases <= 6 || config.flags4 & CFG4speed)
1965             goto Lifthen;
1966         scodelem(cdb,e,&retregs,0,true);
1967         if (dword && mswsame)
1968         {   /* CMP DX,MSW       */
1969             cdb.genc2(0x81,modregrm(3,7,DX),msw);
1970             genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
1971         }
1972         getregs(cdb,mCX|mDI);
1973 
1974         if (config.flags3 & CFG3pic && config.exe & EX_posix)
1975         {   // Add in GOT
1976             getregs(cdb,mDX);
1977             cdb.genc2(CALL,0,0);        //     CALL L1
1978             cdb.gen1(0x58 + DI);        // L1: POP EDI
1979 
1980                                         //     ADD EDI,_GLOBAL_OFFSET_TABLE_+3
1981             Symbol *gotsym = Obj.getGOTsym();
1982             cdb.gencs(0x81,modregrm(3,0,DI),FLextern,gotsym);
1983             cdb.last().Iflags = CFoff;
1984             cdb.last().IEV2.Voffset = 3;
1985 
1986             makeitextern(gotsym);
1987 
1988             genmovreg(cdb, DX, DI);    // MOV EDX, EDI
1989                                         // ADD EDI,offset of switch table
1990             cdb.gencs(0x81,modregrm(3,0,DI),FLswitch,null);
1991             cdb.last().IEV2.Vswitch = b;
1992         }
1993 
1994         if (!(config.flags3 & CFG3pic))
1995         {
1996                                         // MOV DI,offset of switch table
1997             cdb.gencs(0xC7,modregrm(3,0,DI),FLswitch,null);
1998             cdb.last().IEV2.Vswitch = b;
1999         }
2000         movregconst(cdb,CX,ncases,0);    // MOV CX,ncases
2001 
2002         /* The switch table will be accessed through ES:DI.
2003          * Therefore, load ES with proper segment value.
2004          */
2005         if (config.flags3 & CFG3eseqds)
2006         {
2007             assert(!csseg);
2008             getregs(cdb,mCX);           // allocate CX
2009         }
2010         else
2011         {
2012             getregs(cdb,mES|mCX);       // allocate ES and CX
2013             cdb.gen1(csseg ? 0x0E : 0x1E);      // PUSH CS/DS
2014             cdb.gen1(0x07);                     // POP  ES
2015         }
2016 
2017         targ_size_t disp = (ncases - 1) * _tysize[TYint];  // displacement to jump table
2018         if (dword && !mswsame)
2019         {
2020 
2021             /* Build the following:
2022                 L1:     SCASW
2023                         JNE     L2
2024                         CMP     DX,[CS:]disp[DI]
2025                 L2:     LOOPNE  L1
2026              */
2027 
2028             const int mod = (disp > 127) ? 2 : 1;         // displacement size
2029             code *cloop = genc2(null,0xE0,0,-7 - mod - csseg);   // LOOPNE scasw
2030             cdb.gen1(0xAF);                                      // SCASW
2031             code_orflag(cdb.last(),CFtarg2);                     // target of jump
2032             genjmp(cdb,JNE,FLcode,cast(block *) cloop); // JNE loop
2033                                                                  // CMP DX,[CS:]disp[DI]
2034             cdb.genc1(0x39,modregrm(mod,DX,5),FLconst,disp);
2035             cdb.last().Iflags |= csseg ? CFcs : 0;              // possible seg override
2036             cdb.append(cloop);
2037             disp += ncases * _tysize[TYint];           // skip over msw table
2038         }
2039         else
2040         {
2041             cdb.gen1(0xF2);              // REPNE
2042             cdb.gen1(0xAF);              // SCASW
2043         }
2044         genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default
2045         const int mod = (disp > 127) ? 2 : 1;     // 1 or 2 byte displacement
2046         if (csseg)
2047             cdb.gen1(SEGCS);            // table is in code segment
2048 
2049         if (config.flags3 & CFG3pic &&
2050             config.exe & EX_posix)
2051         {                               // ADD EDX,(ncases-1)*2[EDI]
2052             cdb.genc1(0x03,modregrm(mod,DX,7),FLconst,disp);
2053                                         // JMP EDX
2054             cdb.gen2(0xFF,modregrm(3,4,DX));
2055         }
2056 
2057         if (!(config.flags3 & CFG3pic))
2058         {                               // JMP (ncases-1)*2[DI]
2059             cdb.genc1(0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp);
2060             cdb.last().Iflags |= csseg ? CFcs : 0;
2061         }
2062         b.Btablesize = disp + _tysize[TYint] + ncases * tysize(TYnptr);
2063         //assert(b.Bcode);
2064         cgstate.stackclean--;
2065         return;
2066     }
2067 }
2068 
2069 /******************************
2070  * Output data block for a jump table (BCjmptab).
2071  * The 'holes' in the table get filled with the
2072  * default label.
2073  */
2074 
2075 void outjmptab(block *b)
2076 {
2077     if (JMPJMPTABLE && I32)
2078         return;
2079 
2080     targ_llong *p = b.Bswitch;               // pointer to case data
2081     size_t ncases = cast(size_t)*p++;        // number of cases
2082 
2083     /* Find vmin and vmax, the range of the table will be [vmin .. vmax + 1]
2084      * Must be same computation as used in doswitch().
2085      */
2086     targ_llong vmax = MINLL;                 // smallest possible llong
2087     targ_llong vmin = MAXLL;                 // largest possible llong
2088     for (size_t n = 0; n < ncases; n++)      // find min case value
2089     {   targ_llong val = p[n];
2090         if (val > vmax) vmax = val;
2091         if (val < vmin) vmin = val;
2092     }
2093     if (vmin > 0 && vmin <= _tysize[TYint])
2094         vmin = 0;
2095     assert(vmin <= vmax);
2096 
2097     /* Segment and offset into which the jump table will be emitted
2098      */
2099     int jmpseg = objmod.jmpTableSegment(funcsym_p);
2100     targ_size_t *poffset = &Offset(jmpseg);
2101 
2102     /* Align start of jump table
2103      */
2104     targ_size_t alignbytes = _align(0,*poffset) - *poffset;
2105     objmod.lidata(jmpseg,*poffset,alignbytes);
2106     assert(*poffset == b.Btableoffset);        // should match precomputed value
2107 
2108     Symbol *gotsym = null;
2109     targ_size_t def = b.nthSucc(0).Boffset;  // default address
2110     for (targ_llong u = vmin; ; u++)
2111     {   targ_size_t targ = def;                     // default
2112         for (size_t n = 0; n < ncases; n++)
2113         {       if (p[n] == u)
2114                 {       targ = b.nthSucc(cast(int)(n + 1)).Boffset;
2115                         break;
2116                 }
2117         }
2118         if (config.exe & (EX_LINUX64 | EX_FREEBSD64 | EX_OPENBSD64 | EX_DRAGONFLYBSD64 | EX_SOLARIS64))
2119         {
2120             if (config.flags3 & CFG3pic)
2121             {
2122                 objmod.reftodatseg(jmpseg,*poffset,cast(targ_size_t)(targ + (u - vmin) * 4),funcsym_p.Sseg,CFswitch);
2123                 *poffset += 4;
2124             }
2125             else
2126             {
2127                 objmod.reftodatseg(jmpseg,*poffset,targ,funcsym_p.Sxtrnnum,CFoffset64 | CFswitch);
2128                 *poffset += 8;
2129             }
2130         }
2131         else if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS))
2132         {
2133             if (config.flags3 & CFG3pic)
2134             {
2135                 assert(config.flags & CFGromable);
2136                 // Want a GOTPC fixup to _GLOBAL_OFFSET_TABLE_
2137                 if (!gotsym)
2138                     gotsym = Obj.getGOTsym();
2139                 objmod.reftoident(jmpseg,*poffset,gotsym,*poffset - targ,CFswitch);
2140             }
2141             else
2142                 objmod.reftocodeseg(jmpseg,*poffset,targ);
2143             *poffset += 4;
2144         }
2145         else if (config.exe & (EX_OSX | EX_OSX64))
2146         {
2147             targ_size_t val;
2148             if (I64)
2149                 val = targ - b.Btableoffset;
2150             else
2151                 val = targ - b.Btablebase;
2152             objmod.write_bytes(SegData[jmpseg],4,&val);
2153         }
2154         else
2155         {
2156             if (I64)
2157             {
2158                 targ_size_t val = targ - b.Btableoffset;
2159                 objmod.write_bytes(SegData[jmpseg],4,&val);
2160             }
2161             else
2162             {
2163                 objmod.reftocodeseg(jmpseg,*poffset,targ);
2164                 *poffset += tysize(TYnptr);
2165             }
2166         }
2167 
2168         if (u == vmax)                  // for case that (vmax == ~0)
2169             break;
2170     }
2171 }
2172 
2173 
2174 /******************************
2175  * Output data block for a switch table.
2176  * Two consecutive tables, the first is the case value table, the
2177  * second is the address table.
2178  */
2179 
2180 void outswitab(block *b)
2181 {
2182     //printf("outswitab()\n");
2183     targ_llong *p = b.Bswitch;        // pointer to case data
2184     uint ncases = cast(uint)*p++;     // number of cases
2185 
2186     const int seg = objmod.jmpTableSegment(funcsym_p);
2187     targ_size_t *poffset = &Offset(seg);
2188     targ_size_t offset = *poffset;
2189     targ_size_t alignbytes = _align(0,*poffset) - *poffset;
2190     objmod.lidata(seg,*poffset,alignbytes);  // any alignment bytes necessary
2191     assert(*poffset == offset + alignbytes);
2192 
2193     uint sz = _tysize[TYint];
2194     assert(SegData[seg].SDseg == seg);
2195     for (uint n = 0; n < ncases; n++)          // send out value table
2196     {
2197         //printf("\tcase %d, offset = x%x\n", n, *poffset);
2198         objmod.write_bytes(SegData[seg],sz,p);
2199         p++;
2200     }
2201     offset += alignbytes + sz * ncases;
2202     assert(*poffset == offset);
2203 
2204     if (b.Btablesize == ncases * (REGSIZE * 2 + tysize(TYnptr)))
2205     {
2206         // Send out MSW table
2207         p -= ncases;
2208         for (uint n = 0; n < ncases; n++)
2209         {
2210             targ_size_t val = cast(targ_size_t)MSREG(*p);
2211             p++;
2212             objmod.write_bytes(SegData[seg],REGSIZE,&val);
2213         }
2214         offset += REGSIZE * ncases;
2215         assert(*poffset == offset);
2216     }
2217 
2218     list_t bl = b.Bsucc;
2219     for (uint n = 0; n < ncases; n++)          // send out address table
2220     {
2221         bl = list_next(bl);
2222         objmod.reftocodeseg(seg,*poffset,list_block(bl).Boffset);
2223         *poffset += tysize(TYnptr);
2224     }
2225     assert(*poffset == offset + ncases * tysize(TYnptr));
2226 }
2227 
2228 /*****************************
2229  * Return a jump opcode relevant to the elem for a JMP true.
2230  */
2231 
2232 int jmpopcode(elem *e)
2233 {
2234     tym_t tym;
2235     int zero,i,jp,op;
2236     static immutable ubyte[6][2][2] jops =
2237     [   /* <=  >   <   >=  ==  !=    <=0 >0  <0  >=0 ==0 !=0    */
2238        [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JS ,JNS,JE ,JNE] ], /* signed   */
2239        [ [JBE,JA ,JB ,JAE,JE ,JNE],[JE ,JNE,JB ,JAE,JE ,JNE] ], /* uint */
2240 /+
2241        [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JL ,JGE,JE ,JNE] ], /* real     */
2242        [ [JBE,JA ,JB ,JAE,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087     */
2243        [ [JA ,JBE,JAE,JB ,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 R   */
2244 +/
2245     ];
2246 
2247     enum
2248     {
2249         XP     = (JP  << 8),
2250         XNP    = (JNP << 8),
2251     }
2252     static immutable uint[26][1] jfops =
2253     /*   le     gt lt     ge  eqeq    ne     unord lg  leg  ule ul uge  */
2254     [
2255       [ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP,   JNE,JNP, JBE,JC,XP|JAE,
2256 
2257     /*  ug    ue ngt nge nlt    nle    ord nlg nleg nule nul nuge    nug     nue */
2258         XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP,  JA,  JNC,XNP|JB, XNP|JBE,JNE        ], /* 8087     */
2259     ];
2260 
2261     assert(e);
2262     while (e.Eoper == OPcomma ||
2263         /* The OTleaf(e.EV.E1.Eoper) is to line up with the case in cdeq() where  */
2264         /* we decide if mPSW is passed on when evaluating E2 or not.    */
2265          (e.Eoper == OPeq && OTleaf(e.EV.E1.Eoper)))
2266     {
2267         e = e.EV.E2;                      /* right operand determines it  */
2268     }
2269 
2270     op = e.Eoper;
2271     tym_t tymx = tybasic(e.Ety);
2272     bool needsNanCheck = tyfloating(tymx) && config.inline8087 &&
2273         (tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble ||
2274          tymx == TYcdouble || tymx == TYcfloat ||
2275          (tyxmmreg(tymx) && config.fpxmmregs && e.Ecount != e.Ecomsub) ||
2276          op == OPind ||
2277          (OTcall(op) && (regmask(tymx, tybasic(e.EV.E1.Eoper)) & (mST0 | XMMREGS))));
2278     if (e.Ecount != e.Ecomsub)          // comsubs just get Z bit set
2279     {
2280         if (needsNanCheck) // except for floating point values that need a NaN check
2281             return XP|JNE;
2282         else
2283             return JNE;
2284     }
2285     if (!OTrel(op))                       // not relational operator
2286     {
2287         if (needsNanCheck)
2288             return XP|JNE;
2289 
2290         if (op == OPu32_64) { e = e.EV.E1; op = e.Eoper; }
2291         if (op == OPu16_32) { e = e.EV.E1; op = e.Eoper; }
2292         if (op == OPu8_16) op = e.EV.E1.Eoper;
2293         return ((op >= OPbt && op <= OPbts) || op == OPbtst) ? JC : JNE;
2294     }
2295 
2296     if (e.EV.E2.Eoper == OPconst)
2297         zero = !boolres(e.EV.E2);
2298     else
2299         zero = 0;
2300 
2301     tym = e.EV.E1.Ety;
2302     if (tyfloating(tym))
2303     {
2304 static if (1)
2305 {
2306         i = 0;
2307         if (config.inline8087)
2308         {   i = 1;
2309 
2310 static if (1)
2311 {
2312             if (rel_exception(op) || config.flags4 & CFG4fastfloat)
2313             {
2314                 const bool NOSAHF = (I64 || config.fpxmmregs);
2315                 if (zero)
2316                 {
2317                     if (NOSAHF)
2318                         op = swaprel(op);
2319                 }
2320                 else if (NOSAHF)
2321                     op = swaprel(op);
2322                 else if (cmporder87(e.EV.E2))
2323                     op = swaprel(op);
2324                 else
2325                 { }
2326             }
2327             else
2328             {
2329                 if (zero && config.target_cpu < TARGET_80386)
2330                 { }
2331                 else
2332                     op = swaprel(op);
2333             }
2334 }
2335 else
2336 {
2337             if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386)
2338                 op = swaprel(op);
2339             else if (!zero &&
2340                 (cmporder87(e.EV.E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat)))
2341                 /* compare is reversed */
2342                 op = swaprel(op);
2343 }
2344         }
2345         jp = jfops[0][op - OPle];
2346         goto L1;
2347 }
2348 else
2349 {
2350         i = (config.inline8087) ? (3 + cmporder87(e.EV.E2)) : 2;
2351 }
2352     }
2353     else if (tyuns(tym) || tyuns(e.EV.E2.Ety))
2354         i = 1;
2355     else if (tyintegral(tym) || typtr(tym))
2356         i = 0;
2357     else
2358     {
2359         debug
2360         elem_print(e);
2361         WRTYxx(tym);
2362         assert(0);
2363     }
2364 
2365     jp = jops[i][zero][op - OPle];        /* table starts with OPle       */
2366 
2367     /* Try to rewrite uint comparisons so they rely on just the Carry flag
2368      */
2369     if (i == 1 && (jp == JA || jp == JBE) &&
2370         (e.EV.E2.Eoper != OPconst && e.EV.E2.Eoper != OPrelconst))
2371     {
2372         jp = (jp == JA) ? JC : JNC;
2373     }
2374 
2375 L1:
2376     debug
2377     if ((jp & 0xF0) != 0x70)
2378     {
2379         WROP(op);
2380         printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp);
2381     }
2382 
2383     assert((jp & 0xF0) == 0x70);
2384     return jp;
2385 }
2386 
2387 /**********************************
2388  * Append code to cdb which validates pointer described by
2389  * addressing mode in *pcs. Modify addressing mode in *pcs.
2390  * Params:
2391  *    cdb = append generated code to this
2392  *    pcs = original addressing mode to be updated
2393  *    keepmsk = mask of registers we must not destroy or use
2394  *              if (keepmsk & RMstore), this will be only a store operation
2395  *              into the lvalue
2396  */
2397 
2398 void cod3_ptrchk(ref CodeBuilder cdb,code *pcs,regm_t keepmsk)
2399 {
2400     ubyte sib;
2401     reg_t reg;
2402     uint flagsave;
2403 
2404     assert(!I64);
2405     if (!I16 && pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
2406         return;         // not designed to deal with 48 bit far pointers
2407 
2408     ubyte rm = pcs.Irm;
2409     assert(!(rm & 0x40));       // no disp8 or reg addressing modes
2410 
2411     // If the addressing mode is already a register
2412     reg = rm & 7;
2413     if (I16)
2414     {   static immutable ubyte[8] imode = [ BP,BP,BP,BP,SI,DI,BP,BX ];
2415 
2416         reg = imode[reg];               // convert [SI] to SI, etc.
2417     }
2418     regm_t idxregs = mask(reg);
2419     if ((rm & 0x80 && (pcs.IFL1 != FLoffset || pcs.IEV1.Vuns)) ||
2420         !(idxregs & ALLREGS)
2421        )
2422     {
2423         // Load the offset into a register, so we can push the address
2424         regm_t idxregs2 = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs
2425         assert(idxregs2);
2426         allocreg(cdb,&idxregs2,&reg,TYoffset);
2427 
2428         const opsave = pcs.Iop;
2429         flagsave = pcs.Iflags;
2430         pcs.Iop = LEA;
2431         pcs.Irm |= modregrm(0,reg,0);
2432         pcs.Iflags &= ~(CFopsize | CFss | CFes | CFcs);        // no prefix bytes needed
2433         cdb.gen(pcs);                 // LEA reg,EA
2434 
2435         pcs.Iflags = flagsave;
2436         pcs.Iop = opsave;
2437     }
2438 
2439     // registers destroyed by the function call
2440     //used = (mBP | ALLREGS | mES) & ~fregsaved;
2441     regm_t used = 0;                           // much less code generated this way
2442 
2443     code *cs2 = null;
2444     regm_t tosave = used & (keepmsk | idxregs);
2445     for (int i = 0; tosave; i++)
2446     {
2447         regm_t mi = mask(i);
2448 
2449         assert(i < REGMAX);
2450         if (mi & tosave)        /* i = register to save                 */
2451         {
2452             int push,pop;
2453 
2454             stackchanged = 1;
2455             if (i == ES)
2456             {   push = 0x06;
2457                 pop = 0x07;
2458             }
2459             else
2460             {   push = 0x50 + i;
2461                 pop = push | 8;
2462             }
2463             cdb.gen1(push);                     // PUSH i
2464             cs2 = cat(gen1(null,pop),cs2);      // POP i
2465             tosave &= ~mi;
2466         }
2467     }
2468 
2469     // For 16 bit models, push a far pointer
2470     if (I16)
2471     {
2472         int segreg;
2473 
2474         switch (pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs))
2475         {   case CFes:  segreg = 0x06;  break;
2476             case CFss:  segreg = 0x16;  break;
2477             case CFcs:  segreg = 0x0E;  break;
2478             case 0:     segreg = 0x1E;  break;  // DS
2479             default:
2480                 assert(0);
2481         }
2482 
2483         // See if we should default to SS:
2484         // (Happens when BP is part of the addressing mode)
2485         if (segreg == 0x1E && (rm & 0xC0) != 0xC0 &&
2486             rm & 2 && (rm & 7) != 7)
2487         {
2488             segreg = 0x16;
2489             if (config.wflags & WFssneds)
2490                 pcs.Iflags |= CFss;    // because BP won't be there anymore
2491         }
2492         cdb.gen1(segreg);               // PUSH segreg
2493     }
2494 
2495     cdb.gen1(0x50 + reg);               // PUSH reg
2496 
2497     // Rewrite the addressing mode in *pcs so it is just 0[reg]
2498     setaddrmode(pcs, idxregs);
2499     pcs.IFL1 = FLoffset;
2500     pcs.IEV1.Vuns = 0;
2501 
2502     // Call the validation function
2503     {
2504         makeitextern(getRtlsym(RTLSYM_PTRCHK));
2505 
2506         used &= ~(keepmsk | idxregs);           // regs destroyed by this exercise
2507         getregs(cdb,used);
2508                                                 // CALL __ptrchk
2509         cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_PTRCHK));
2510     }
2511 
2512     cdb.append(cs2);
2513 }
2514 
2515 /***********************************
2516  * Determine if BP can be used as a general purpose register.
2517  * Note parallels between this routine and prolog().
2518  * Returns:
2519  *      0       can't be used, needed for frame
2520  *      mBP     can be used
2521  */
2522 
2523 regm_t cod3_useBP()
2524 {
2525     tym_t tym;
2526     tym_t tyf;
2527 
2528     // Note that DOSX memory model cannot use EBP as a general purpose
2529     // register, as SS != DS.
2530     if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp))
2531         goto Lcant;
2532 
2533     if (anyiasm)
2534         goto Lcant;
2535 
2536     tyf = funcsym_p.ty();
2537     if (tyf & mTYnaked)                 // if no prolog/epilog for function
2538         goto Lcant;
2539 
2540     if (funcsym_p.Sfunc.Fflags3 & Ffakeeh)
2541     {
2542         goto Lcant;                     // need consistent stack frame
2543     }
2544 
2545     tym = tybasic(tyf);
2546     if (tym == TYifunc)
2547         goto Lcant;
2548 
2549     stackoffsets(globsym, true);                // estimate stack offsets
2550     localsize = Auto.offset + Fast.offset;                // an estimate only
2551 //    if (localsize)
2552     {
2553         if (!(config.flags4 & CFG4speed) ||
2554             config.target_cpu < TARGET_Pentium ||
2555             tyfarfunc(tym) ||
2556             config.flags & CFGstack ||
2557             localsize >= 0x100 ||       // arbitrary value < 0x1000
2558             (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) ||
2559             calledFinally ||
2560             Alloca.size
2561            )
2562             goto Lcant;
2563     }
2564     return mBP;
2565 
2566 Lcant:
2567     return 0;
2568 }
2569 
2570 /*************************************************
2571  * Generate code segment to be used later to restore a cse
2572  */
2573 
2574 bool cse_simple(code *c, elem *e)
2575 {
2576     regm_t regm;
2577     reg_t reg;
2578     int sz = tysize(e.Ety);
2579 
2580     if (!I16 &&                                  // don't bother with 16 bit code
2581         e.Eoper == OPadd &&
2582         sz == REGSIZE &&
2583         e.EV.E2.Eoper == OPconst &&
2584         e.EV.E1.Eoper == OPvar &&
2585         isregvar(e.EV.E1,&regm,&reg) &&
2586         !(e.EV.E1.EV.Vsym.Sflags & SFLspill)
2587        )
2588     {
2589         memset(c,0,(*c).sizeof);
2590 
2591         // Make this an LEA instruction
2592         c.Iop = LEA;
2593         buildEA(c,reg,-1,1,e.EV.E2.EV.Vuns);
2594         if (I64)
2595         {   if (sz == 8)
2596                 c.Irex |= REX_W;
2597         }
2598 
2599         return true;
2600     }
2601     else if (e.Eoper == OPind &&
2602         sz <= REGSIZE &&
2603         e.EV.E1.Eoper == OPvar &&
2604         isregvar(e.EV.E1,&regm,&reg) &&
2605         (I32 || I64 || regm & IDXREGS) &&
2606         !(e.EV.E1.EV.Vsym.Sflags & SFLspill)
2607        )
2608     {
2609         memset(c,0,(*c).sizeof);
2610 
2611         // Make this a MOV instruction
2612         c.Iop = (sz == 1) ? 0x8A : 0x8B;       // MOV reg,EA
2613         buildEA(c,reg,-1,1,0);
2614         if (sz == 2 && I32)
2615             c.Iflags |= CFopsize;
2616         else if (I64)
2617         {   if (sz == 8)
2618                 c.Irex |= REX_W;
2619         }
2620 
2621         return true;
2622     }
2623     return false;
2624 }
2625 
2626 /**************************
2627  * Store `reg` to the common subexpression save area in index `slot`.
2628  * Params:
2629  *      cdb = where to write code to
2630  *      tym = type of value that's in `reg`
2631  *      reg = register to save
2632  *      slot = index into common subexpression save area
2633  */
2634 void gen_storecse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot)
2635 {
2636     // MOV slot[BP],reg
2637     if (isXMMreg(reg) && config.fpxmmregs) // watch out for ES
2638     {
2639         const aligned = tyvector(tym) ? STACKALIGN >= 16 : true;
2640         const op = xmmstore(tym, aligned);
2641         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot);
2642         return;
2643     }
2644     opcode_t op = STO;              // normal mov
2645     if (reg == ES)
2646     {
2647         reg = 0;            // the real reg number
2648         op = 0x8C;          // segment reg mov
2649     }
2650     cdb.genc1(op,modregxrm(2, reg, BPRM),FLcs,cast(targ_uns)slot);
2651     if (I64)
2652         code_orrex(cdb.last(), REX_W);
2653 }
2654 
2655 void gen_testcse(ref CodeBuilder cdb, tym_t tym, uint sz, size_t slot)
2656 {
2657     // CMP slot[BP],0
2658     cdb.genc(sz == 1 ? 0x80 : 0x81,modregrm(2,7,BPRM),
2659                 FLcs,cast(targ_uns)slot, FLconst,cast(targ_uns) 0);
2660     if ((I64 || I32) && sz == 2)
2661         cdb.last().Iflags |= CFopsize;
2662     if (I64 && sz == 8)
2663         code_orrex(cdb.last(), REX_W);
2664 }
2665 
2666 void gen_loadcse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot)
2667 {
2668     // MOV reg,slot[BP]
2669     if (isXMMreg(reg) && config.fpxmmregs)
2670     {
2671         const aligned = tyvector(tym) ? STACKALIGN >= 16 : true;
2672         const op = xmmload(tym, aligned);
2673         cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot);
2674         return;
2675     }
2676     opcode_t op = LOD;
2677     if (reg == ES)
2678     {
2679         op = 0x8E;
2680         reg = 0;
2681     }
2682     cdb.genc1(op,modregxrm(2,reg,BPRM),FLcs,cast(targ_uns)slot);
2683     if (I64)
2684         code_orrex(cdb.last(), REX_W);
2685 }
2686 
2687 /***************************************
2688  * Gen code for OPframeptr
2689  */
2690 
2691 void cdframeptr(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2692 {
2693     regm_t retregs = *pretregs & allregs;
2694     if  (!retregs)
2695         retregs = allregs;
2696     reg_t reg;
2697     allocreg(cdb,&retregs, &reg, TYint);
2698 
2699     code cs;
2700     cs.Iop = ESCAPE | ESCframeptr;
2701     cs.Iflags = 0;
2702     cs.Irex = 0;
2703     cs.Irm = cast(ubyte)reg;
2704     cdb.gen(&cs);
2705     fixresult(cdb,e,retregs,pretregs);
2706 }
2707 
2708 /***************************************
2709  * Gen code for load of _GLOBAL_OFFSET_TABLE_.
2710  * This value gets cached in the local variable 'localgot'.
2711  */
2712 
2713 void cdgot(ref CodeBuilder cdb, elem *e, regm_t *pretregs)
2714 {
2715     if (config.exe & (EX_OSX | EX_OSX64))
2716     {
2717         regm_t retregs = *pretregs & allregs;
2718         if  (!retregs)
2719             retregs = allregs;
2720         reg_t reg;
2721         allocreg(cdb,&retregs, &reg, TYnptr);
2722 
2723         cdb.genc(CALL,0,0,0,FLgot,0);     //     CALL L1
2724         cdb.gen1(0x58 + reg);             // L1: POP reg
2725 
2726         fixresult(cdb,e,retregs,pretregs);
2727     }
2728     else if (config.exe & EX_posix)
2729     {
2730         regm_t retregs = *pretregs & allregs;
2731         if  (!retregs)
2732             retregs = allregs;
2733         reg_t reg;
2734         allocreg(cdb,&retregs, &reg, TYnptr);
2735 
2736         cdb.genc2(CALL,0,0);        //     CALL L1
2737         cdb.gen1(0x58 + reg);       // L1: POP reg
2738 
2739                                     //     ADD reg,_GLOBAL_OFFSET_TABLE_+3
2740         Symbol *gotsym = Obj.getGOTsym();
2741         cdb.gencs(0x81,modregrm(3,0,reg),FLextern,gotsym);
2742         /* Because the 2:3 offset from L1: is hardcoded,
2743          * this sequence of instructions must not
2744          * have any instructions in between,
2745          * so set CFvolatile to prevent the scheduler from rearranging it.
2746          */
2747         code *cgot = cdb.last();
2748         cgot.Iflags = CFoff | CFvolatile;
2749         cgot.IEV2.Voffset = (reg == AX) ? 2 : 3;
2750 
2751         makeitextern(gotsym);
2752         fixresult(cdb,e,retregs,pretregs);
2753     }
2754     else
2755         assert(0);
2756 }
2757 
2758 /**************************************************
2759  * Load contents of localgot into EBX.
2760  */
2761 
2762 void load_localgot(ref CodeBuilder cdb)
2763 {
2764     if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // note: I32 only
2765     {
2766         if (config.flags3 & CFG3pic)
2767         {
2768             if (localgot && !(localgot.Sflags & SFLdead))
2769             {
2770                 localgot.Sflags &= ~GTregcand;     // because this hack doesn't work with reg allocator
2771                 elem *e = el_var(localgot);
2772                 regm_t retregs = mBX;
2773                 codelem(cdb,e,&retregs,false);
2774                 el_free(e);
2775             }
2776             else
2777             {
2778                 elem *e = el_long(TYnptr, 0);
2779                 e.Eoper = OPgot;
2780                 regm_t retregs = mBX;
2781                 codelem(cdb,e,&retregs,false);
2782                 el_free(e);
2783             }
2784         }
2785     }
2786 }
2787 
2788 /*****************************
2789  * Returns:
2790  *      # of bytes stored
2791  */
2792 
2793 
2794 int obj_namestring(char *p,const(char)* name)
2795 {
2796     size_t len = strlen(name);
2797     if (len > 255)
2798     {
2799         short *ps = cast(short *)p;
2800         p[0] = 0xFF;
2801         p[1] = 0;
2802         ps[1] = cast(short)len;
2803         memcpy(p + 4,name,len);
2804         const int ONS_OHD = 4;           // max # of extra bytes added by obj_namestring()
2805         len += ONS_OHD;
2806     }
2807     else
2808     {
2809         p[0] = cast(char)len;
2810         memcpy(p + 1,name,len);
2811         len++;
2812     }
2813     return cast(int)len;
2814 }
2815 
2816 void genregs(ref CodeBuilder cdb,opcode_t op,uint dstreg,uint srcreg)
2817 {
2818     return cdb.gen2(op,modregxrmx(3,dstreg,srcreg));
2819 }
2820 
2821 void gentstreg(ref CodeBuilder cdb, uint t)
2822 {
2823     cdb.gen2(0x85,modregxrmx(3,t,t));   // TEST t,t
2824     code_orflag(cdb.last(),CFpsw);
2825 }
2826 
2827 void genpush(ref CodeBuilder cdb, reg_t reg)
2828 {
2829     cdb.gen1(0x50 + (reg & 7));
2830     if (reg & 8)
2831         code_orrex(cdb.last(), REX_B);
2832 }
2833 
2834 void genpop(ref CodeBuilder cdb, reg_t reg)
2835 {
2836     cdb.gen1(0x58 + (reg & 7));
2837     if (reg & 8)
2838         code_orrex(cdb.last(), REX_B);
2839 }
2840 
2841 /**************************
2842  * Generate a MOV to,from register instruction.
2843  * Smart enough to dump redundant register moves, and segment
2844  * register moves.
2845  */
2846 
2847 code *genmovreg(uint to,uint from)
2848 {
2849     CodeBuilder cdb; cdb.ctor();
2850     genmovreg(cdb, to, from);
2851     return cdb.finish();
2852 }
2853 
2854 void genmovreg(ref CodeBuilder cdb,uint to,uint from)
2855 {
2856     genmovreg(cdb, to, from, TYMAX);
2857 }
2858 
2859 void genmovreg(ref CodeBuilder cdb, uint to, uint from, tym_t tym)
2860 {
2861     // register kind. ex: GPR,XMM,SEG
2862     static uint _K(uint reg)
2863     {
2864         switch (reg)
2865         {
2866         case ES:                   return ES;
2867         case XMM15:
2868         case XMM0: .. case XMM7:   return XMM0;
2869         case AX:   .. case R15:    return AX;
2870         default:                   return reg;
2871         }
2872     }
2873 
2874     // kind combination (order kept)
2875     static uint _X(uint to, uint from) { return (_K(to) << 8) + _K(from); }
2876 
2877     if (to != from)
2878     {
2879         if (tym == TYMAX) tym = TYsize_t; // avoid register slicing
2880         switch (_X(to, from))
2881         {
2882             case _X(AX, AX):
2883                 genregs(cdb, 0x89, from, to);    // MOV to,from
2884                 if (I64 && tysize(tym) >= 8)
2885                     code_orrex(cdb.last(), REX_W);
2886                 break;
2887 
2888             case _X(XMM0, XMM0):             // MOVD/Q to,from
2889                 genregs(cdb, xmmload(tym), to-XMM0, from-XMM0);
2890                 checkSetVex(cdb.last(), tym);
2891                 break;
2892 
2893             case _X(AX, XMM0):               // MOVD/Q to,from
2894                 genregs(cdb, STOD, from-XMM0, to);
2895                 if (I64 && tysize(tym) >= 8)
2896                     code_orrex(cdb.last(), REX_W);
2897                 checkSetVex(cdb.last(), tym);
2898                 break;
2899 
2900             case _X(XMM0, AX):               // MOVD/Q to,from
2901                 genregs(cdb, LODD, to-XMM0, from);
2902                 if (I64 && tysize(tym) >= 8)
2903                     code_orrex(cdb.last(),  REX_W);
2904                 checkSetVex(cdb.last(), tym);
2905                 break;
2906 
2907             case _X(ES, AX):
2908                 assert(tysize(tym) <= REGSIZE);
2909                 genregs(cdb, 0x8E, 0, from);
2910                 break;
2911 
2912             case _X(AX, ES):
2913                 assert(tysize(tym) <= REGSIZE);
2914                 genregs(cdb, 0x8C, 0, to);
2915                 break;
2916 
2917             default:
2918                 debug printf("genmovreg(to = %s, from = %s)\n"
2919                     , regm_str(mask(to)), regm_str(mask(from)));
2920                 assert(0);
2921         }
2922     }
2923 }
2924 
2925 /***************************************
2926  * Generate immediate multiply instruction for r1=r2*imm.
2927  * Optimize it into LEA's if we can.
2928  */
2929 
2930 void genmulimm(ref CodeBuilder cdb,uint r1,uint r2,targ_int imm)
2931 {
2932     // These optimizations should probably be put into pinholeopt()
2933     switch (imm)
2934     {
2935         case 1:
2936             genmovreg(cdb,r1,r2);
2937             break;
2938 
2939         case 5:
2940         {
2941             code cs;
2942             cs.Iop = LEA;
2943             cs.Iflags = 0;
2944             cs.Irex = 0;
2945             buildEA(&cs,r2,r2,4,0);
2946             cs.orReg(r1);
2947             cdb.gen(&cs);
2948             break;
2949         }
2950 
2951         default:
2952             cdb.genc2(0x69,modregxrmx(3,r1,r2),imm);    // IMUL r1,r2,imm
2953             break;
2954     }
2955 }
2956 
2957 /******************************
2958  * Load CX with the value of _AHSHIFT.
2959  */
2960 
2961 void genshift(ref CodeBuilder cdb)
2962 {
2963     version (SCPP)
2964     {
2965         // Set up ahshift to trick ourselves into giving the right fixup,
2966         // which must be seg-relative, external frame, external target.
2967         cdb.gencs(0xC7,modregrm(3,0,CX),FLfunc,getRtlsym(RTLSYM_AHSHIFT));
2968         cdb.last().Iflags |= CFoff;
2969     }
2970     else
2971         assert(0);
2972 }
2973 
2974 /******************************
2975  * Move constant value into reg.
2976  * Take advantage of existing values in registers.
2977  * If flags & mPSW
2978  *      set flags based on result
2979  * Else if flags & 8
2980  *      do not disturb flags
2981  * Else
2982  *      don't care about flags
2983  * If flags & 1 then byte move
2984  * If flags & 2 then short move (for I32 and I64)
2985  * If flags & 4 then don't disturb unused portion of register
2986  * If flags & 16 then reg is a byte register AL..BH
2987  * If flags & 64 (0x40) then 64 bit move (I64 only)
2988  * Returns:
2989  *      code (if any) generated
2990  */
2991 
2992 void movregconst(ref CodeBuilder cdb,reg_t reg,targ_size_t value,regm_t flags)
2993 {
2994     reg_t r;
2995     regm_t mreg;
2996 
2997     //printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask(reg)), value, value, flags);
2998 
2999     regm_t regm = regcon.immed.mval & mask(reg);
3000     targ_size_t regv = regcon.immed.value[reg];
3001 
3002     if (flags & 1)      // 8 bits
3003     {
3004         value &= 0xFF;
3005         regm &= BYTEREGS;
3006 
3007         // If we already have the right value in the right register
3008         if (regm && (regv & 0xFF) == value)
3009             goto L2;
3010 
3011         if (flags & 16 && reg & 4 &&    // if an H byte register
3012             regcon.immed.mval & mask(reg & 3) &&
3013             (((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value)
3014             goto L2;
3015 
3016         /* Avoid byte register loads to avoid dependency stalls.
3017          */
3018         if ((I32 || I64) &&
3019             config.target_cpu >= TARGET_PentiumPro && !(flags & 4))
3020             goto L3;
3021 
3022         // See if another register has the right value
3023         r = 0;
3024         for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1)
3025         {
3026             if (mreg & 1)
3027             {
3028                 if ((regcon.immed.value[r] & 0xFF) == value)
3029                 {
3030                     genregs(cdb,0x8A,reg,r);          // MOV regL,rL
3031                     if (I64 && reg >= 4 || r >= 4)
3032                         code_orrex(cdb.last(), REX);
3033                     goto L2;
3034                 }
3035                 if (!(I64 && reg >= 4) &&
3036                     r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value)
3037                 {
3038                     genregs(cdb,0x8A,reg,r | 4);      // MOV regL,rH
3039                     goto L2;
3040                 }
3041             }
3042             r++;
3043         }
3044 
3045         if (value == 0 && !(flags & 8))
3046         {
3047             if (!(flags & 4) &&                 // if we can set the whole register
3048                 !(flags & 16 && reg & 4))       // and reg is not an H register
3049             {
3050                 genregs(cdb,0x31,reg,reg);      // XOR reg,reg
3051                 regimmed_set(reg,value);
3052                 regv = 0;
3053             }
3054             else
3055                 genregs(cdb,0x30,reg,reg);      // XOR regL,regL
3056             flags &= ~mPSW;                     // flags already set by XOR
3057         }
3058         else
3059         {
3060             cdb.genc2(0xC6,modregrmx(3,0,reg),value);  // MOV regL,value
3061             if (reg >= 4 && I64)
3062             {
3063                 code_orrex(cdb.last(), REX);
3064             }
3065         }
3066     L2:
3067         if (flags & mPSW)
3068             genregs(cdb,0x84,reg,reg);            // TEST regL,regL
3069 
3070         if (regm)
3071             // Set just the 'L' part of the register value
3072             regimmed_set(reg,(regv & ~cast(targ_size_t)0xFF) | value);
3073         else if (flags & 16 && reg & 4 && regcon.immed.mval & mask(reg & 3))
3074             // Set just the 'H' part of the register value
3075             regimmed_set((reg & 3),(regv & ~cast(targ_size_t)0xFF00) | (value << 8));
3076         return;
3077     }
3078 L3:
3079     if (I16)
3080         value = cast(targ_short) value;             // sign-extend MSW
3081     else if (I32)
3082         value = cast(targ_int) value;
3083 
3084     if (!I16 && flags & 2)                      // load 16 bit value
3085     {
3086         value &= 0xFFFF;
3087         if (value && !(flags & mPSW))
3088         {
3089             cdb.genc2(0xC7,modregrmx(3,0,reg),value); // MOV reg,value
3090             regimmed_set(reg, value);
3091             return;
3092         }
3093     }
3094 
3095     // If we already have the right value in the right register
3096     if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64))
3097     {
3098         if (flags & mPSW)
3099             gentstreg(cdb,reg);
3100     }
3101     else if (flags & 64 && regm && regv == value)
3102     {   // Look at the full 64 bits
3103         if (flags & mPSW)
3104         {
3105             gentstreg(cdb,reg);
3106             code_orrex(cdb.last(), REX_W);
3107         }
3108     }
3109     else
3110     {
3111         if (flags & mPSW)
3112         {
3113             switch (value)
3114             {
3115                 case 0:
3116                     genregs(cdb,0x31,reg,reg);
3117                     break;
3118 
3119                 case 1:
3120                     if (I64)
3121                         goto L4;
3122                     genregs(cdb,0x31,reg,reg);
3123                     goto inc;
3124 
3125                 case ~cast(targ_size_t)0:
3126                     if (I64)
3127                         goto L4;
3128                     genregs(cdb,0x31,reg,reg);
3129                     goto dec;
3130 
3131                 default:
3132                 L4:
3133                     if (flags & 64)
3134                     {
3135                         cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64
3136                         gentstreg(cdb,reg);
3137                         code_orrex(cdb.last(), REX_W);
3138                     }
3139                     else
3140                     {
3141                         value &= 0xFFFFFFFF;
3142                         cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value
3143                         gentstreg(cdb,reg);
3144                     }
3145                     break;
3146             }
3147         }
3148         else
3149         {
3150             // Look for single byte conversion
3151             if (regcon.immed.mval & mAX)
3152             {
3153                 if (I32)
3154                 {
3155                     if (reg == AX && value == cast(targ_short) regv)
3156                     {
3157                         cdb.gen1(0x98);               // CWDE
3158                         goto done;
3159                     }
3160                     if (reg == DX &&
3161                         value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) &&
3162                         !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
3163                        )
3164                     {
3165                         cdb.gen1(0x99);               // CDQ
3166                         goto done;
3167                     }
3168                 }
3169                 else if (I16)
3170                 {
3171                     if (reg == AX &&
3172                         cast(targ_short) value == cast(byte) regv)
3173                     {
3174                         cdb.gen1(0x98);               // CBW
3175                         goto done;
3176                     }
3177 
3178                     if (reg == DX &&
3179                         cast(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? cast(targ_short) 0xFFFF : cast(targ_short) 0) &&
3180                         !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium)
3181                        )
3182                     {
3183                         cdb.gen1(0x99);               // CWD
3184                         goto done;
3185                     }
3186                 }
3187             }
3188             if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486)
3189             {
3190                 genregs(cdb,0x31,reg,reg);              // XOR reg,reg
3191                 goto done;
3192             }
3193 
3194             if (!I64 && regm && !(flags & 8))
3195             {
3196                 if (regv + 1 == value ||
3197                     // Catch case of (0xFFFF+1 == 0) for 16 bit compiles
3198                     (I16 && cast(targ_short)(regv + 1) == cast(targ_short)value))
3199                 {
3200                 inc:
3201                     cdb.gen1(0x40 + reg);     // INC reg
3202                     goto done;
3203                 }
3204                 if (regv - 1 == value)
3205                 {
3206                 dec:
3207                     cdb.gen1(0x48 + reg);     // DEC reg
3208                     goto done;
3209                 }
3210             }
3211 
3212             // See if another register has the right value
3213             r = 0;
3214             for (mreg = regcon.immed.mval; mreg; mreg >>= 1)
3215             {
3216                 debug
3217                 assert(!I16 || regcon.immed.value[r] == cast(targ_short)regcon.immed.value[r]);
3218 
3219                 if (mreg & 1 && regcon.immed.value[r] == value)
3220                 {
3221                     genmovreg(cdb,reg,r);
3222                     goto done;
3223                 }
3224                 r++;
3225             }
3226 
3227             if (value == 0 && !(flags & 8))
3228             {
3229                 genregs(cdb,0x31,reg,reg);              // XOR reg,reg
3230             }
3231             else
3232             {   // See if we can just load a byte
3233                 if (regm & BYTEREGS &&
3234                     !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro)
3235                    )
3236                 {
3237                     if ((regv & ~cast(targ_size_t)0xFF) == (value & ~cast(targ_size_t)0xFF))
3238                     {
3239                         movregconst(cdb,reg,value,(flags & 8) |4|1);  // load regL
3240                         return;
3241                     }
3242                     if (regm & (mAX|mBX|mCX|mDX) &&
3243                         (regv & ~cast(targ_size_t)0xFF00) == (value & ~cast(targ_size_t)0xFF00) &&
3244                         !I64)
3245                     {
3246                         movregconst(cdb,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH
3247                         return;
3248                     }
3249                 }
3250                 if (flags & 64)
3251                     cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64
3252                 else
3253                 {
3254                     value &= 0xFFFFFFFF;
3255                     cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value
3256                 }
3257             }
3258         }
3259     done:
3260         regimmed_set(reg,value);
3261     }
3262 }
3263 
3264 /**************************
3265  * Generate a jump instruction.
3266  */
3267 
3268 void genjmp(ref CodeBuilder cdb,opcode_t op,uint fltarg,block *targ)
3269 {
3270     code cs;
3271     cs.Iop = op & 0xFF;
3272     cs.Iflags = 0;
3273     cs.Irex = 0;
3274     if (op != JMP && op != 0xE8)        // if not already long branch
3275           cs.Iflags = CFjmp16;          // assume long branch for op = 0x7x
3276     cs.IFL2 = cast(ubyte)fltarg;        // FLblock (or FLcode)
3277     cs.IEV2.Vblock = targ;              // target block (or code)
3278     if (fltarg == FLcode)
3279         (cast(code *)targ).Iflags |= CFtarg;
3280 
3281     if (config.flags4 & CFG4fastfloat)  // if fast floating point
3282     {
3283         cdb.gen(&cs);
3284         return;
3285     }
3286 
3287     switch (op & 0xFF00)                // look at second jump opcode
3288     {
3289         // The JP and JNP come from floating point comparisons
3290         case JP << 8:
3291             cdb.gen(&cs);
3292             cs.Iop = JP;
3293             cdb.gen(&cs);
3294             break;
3295 
3296         case JNP << 8:
3297         {
3298             // Do a JP around the jump instruction
3299             code *cnop = gennop(null);
3300             genjmp(cdb,JP,FLcode,cast(block *) cnop);
3301             cdb.gen(&cs);
3302             cdb.append(cnop);
3303             break;
3304         }
3305 
3306         case 1 << 8:                    // toggled no jump
3307         case 0 << 8:
3308             cdb.gen(&cs);
3309             break;
3310 
3311         default:
3312             debug
3313             printf("jop = x%x\n",op);
3314             assert(0);
3315     }
3316 }
3317 
3318 /*********************************************
3319  * Generate first part of prolog for interrupt function.
3320  */
3321 void prolog_ifunc(ref CodeBuilder cdb, tym_t* tyf)
3322 {
3323     static immutable ubyte[4] ops2 = [ 0x60,0x1E,0x06,0 ];
3324     static immutable ubyte[11] ops0 = [ 0x50,0x51,0x52,0x53,
3325                                     0x54,0x55,0x56,0x57,
3326                                     0x1E,0x06,0 ];
3327 
3328     immutable(ubyte)* p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr;
3329     do
3330         cdb.gen1(*p);
3331     while (*++p);
3332 
3333     genregs(cdb,0x8B,BP,SP);     // MOV BP,SP
3334     if (localsize)
3335         cod3_stackadj(cdb, cast(int)localsize);
3336 
3337     *tyf |= mTYloadds;
3338 }
3339 
3340 void prolog_ifunc2(ref CodeBuilder cdb, tym_t tyf, tym_t tym, bool pushds)
3341 {
3342     /* Determine if we need to reload DS        */
3343     if (tyf & mTYloadds)
3344     {
3345         if (!pushds)                           // if not already pushed
3346             cdb.gen1(0x1E);                    // PUSH DS
3347         spoff += _tysize[TYint];
3348         cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); // MOV  AX,DGROUP
3349         code *c = cdb.last();
3350         c.IEV2.Vseg = DATA;
3351         c.Iflags ^= CFseg | CFoff;            // turn off CFoff, on CFseg
3352         cdb.gen2(0x8E,modregrm(3,3,AX));       // MOV  DS,AX
3353         useregs(mAX);
3354     }
3355 
3356     if (tym == TYifunc)
3357         cdb.gen1(0xFC);                        // CLD
3358 }
3359 
3360 void prolog_16bit_windows_farfunc(ref CodeBuilder cdb, tym_t* tyf, bool* pushds)
3361 {
3362     int wflags = config.wflags;
3363     if (wflags & WFreduced && !(*tyf & mTYexport))
3364     {   // reduced prolog/epilog for non-exported functions
3365         wflags &= ~(WFdgroup | WFds | WFss);
3366     }
3367 
3368     getregsNoSave(mAX);                     // should not have any value in AX
3369 
3370     int segreg;
3371     switch (wflags & (WFdgroup | WFds | WFss))
3372     {
3373         case WFdgroup:                      // MOV  AX,DGROUP
3374         {
3375             if (wflags & WFreduced)
3376                 *tyf &= ~mTYloadds;          // remove redundancy
3377             cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0);
3378             code *c = cdb.last();
3379             c.IEV2.Vseg = DATA;
3380             c.Iflags ^= CFseg | CFoff;     // turn off CFoff, on CFseg
3381             break;
3382         }
3383 
3384         case WFss:
3385             segreg = 2;                     // SS
3386             goto Lmovax;
3387 
3388         case WFds:
3389             segreg = 3;                     // DS
3390         Lmovax:
3391             cdb.gen2(0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg
3392             if (wflags & WFds)
3393                 cdb.gen1(0x90);             // NOP
3394             break;
3395 
3396         case 0:
3397             break;
3398 
3399         default:
3400             debug
3401             printf("config.wflags = x%x\n",config.wflags);
3402             assert(0);
3403     }
3404     if (wflags & WFincbp)
3405         cdb.gen1(0x40 + BP);              // INC  BP
3406     cdb.gen1(0x50 + BP);                  // PUSH BP
3407     genregs(cdb,0x8B,BP,SP); // MOV  BP,SP
3408     if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
3409     {
3410         cdb.gen1(0x1E);                       // PUSH DS
3411         *pushds = true;
3412         BPoff = -REGSIZE;
3413     }
3414     if (wflags & (WFds | WFss | WFdgroup))
3415         cdb.gen2(0x8E,modregrm(3,3,AX));      // MOV  DS,AX
3416 }
3417 
3418 /**********************************************
3419  * Set up frame register.
3420  * Params:
3421  *      cdb        = write generated code here
3422  *      farfunc    = true if a far function
3423  *      enter      = set to true if ENTER instruction can be used, false otherwise
3424  *      xlocalsize = amount of local variables, set to amount to be subtracted from stack pointer
3425  *      cfa_offset = set to frame pointer's offset from the CFA
3426  * Returns:
3427  *      generated code
3428  */
3429 void prolog_frame(ref CodeBuilder cdb, bool farfunc, ref uint xlocalsize, out bool enter, out int cfa_offset)
3430 {
3431     //printf("prolog_frame\n");
3432     cfa_offset = 0;
3433 
3434     if (0 && config.exe == EX_WIN64)
3435     {
3436         // PUSH RBP
3437         // LEA RBP,0[RSP]
3438         cdb. gen1(0x50 + BP);
3439         cdb.genc1(LEA,(REX_W<<16) | (modregrm(0,4,SP)<<8) | modregrm(2,BP,4),FLconst,0);
3440         enter = false;
3441         return;
3442     }
3443 
3444     if (config.wflags & WFincbp && farfunc)
3445         cdb.gen1(0x40 + BP);      // INC  BP
3446     if (config.target_cpu < TARGET_80286 ||
3447         config.exe & (EX_posix | EX_WIN64) ||
3448         !localsize ||
3449         config.flags & CFGstack ||
3450         (xlocalsize >= 0x1000 && config.exe & EX_flat) ||
3451         localsize >= 0x10000 ||
3452         (NTEXCEPTIONS == 2 &&
3453          (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))) ||
3454         (config.target_cpu >= TARGET_80386 &&
3455          config.flags4 & CFG4speed)
3456        )
3457     {
3458         cdb.gen1(0x50 + BP);      // PUSH BP
3459         genregs(cdb,0x8B,BP,SP);      // MOV  BP,SP
3460         if (I64)
3461             code_orrex(cdb.last(), REX_W);   // MOV RBP,RSP
3462         if ((config.objfmt & (OBJ_ELF | OBJ_MACH)) && config.fulltypes)
3463             // Don't reorder instructions, as dwarf CFA relies on it
3464             code_orflag(cdb.last(), CFvolatile);
3465 static if (NTEXCEPTIONS == 2)
3466 {
3467         if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))
3468         {
3469             nteh_prolog(cdb);
3470             int sz = nteh_contextsym_size();
3471             assert(sz != 0);        // should be 5*4, not 0
3472             xlocalsize -= sz;      // sz is already subtracted from ESP
3473                                     // by nteh_prolog()
3474         }
3475 }
3476         if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D ||
3477             config.ehmethod == EHmethod.EH_DWARF)
3478         {
3479             int off = 2 * REGSIZE;      // 1 for the return address + 1 for the PUSH EBP
3480             dwarf_CFA_set_loc(1);           // address after PUSH EBP
3481             dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP]
3482             dwarf_CFA_offset(BP, -off);       // EBP is at 0[ESP]
3483             dwarf_CFA_set_loc(I64 ? 4 : 3);   // address after MOV EBP,ESP
3484             /* Oddly, the CFA is not the same as the frame pointer,
3485              * which is why the offset of BP is set to 8
3486              */
3487             dwarf_CFA_set_reg_offset(BP, off);        // CFA is now 0[EBP]
3488             cfa_offset = off;  // remember the difference between the CFA and the frame pointer
3489         }
3490         enter = false;              /* do not use ENTER instruction */
3491     }
3492     else
3493         enter = true;
3494 }
3495 
3496 /**********************************************
3497  * Enforce stack alignment.
3498  * Input:
3499  *      cdb     code builder.
3500  * Returns:
3501  *      generated code
3502  */
3503 void prolog_stackalign(ref CodeBuilder cdb)
3504 {
3505     if (!enforcealign)
3506         return;
3507 
3508     const offset = (hasframe ? 2 : 1) * REGSIZE;   // 1 for the return address + 1 for the PUSH EBP
3509     if (offset & (STACKALIGN - 1) || TARGET_STACKALIGN < STACKALIGN)
3510         cod3_stackalign(cdb, STACKALIGN);
3511 }
3512 
3513 void prolog_frameadj(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool enter, bool* pushalloc)
3514 {
3515     uint pushallocreg = (tyf == TYmfunc) ? CX : AX;
3516 
3517     bool check;
3518     if (config.exe & (EX_LINUX | EX_LINUX64))
3519         check = false;               // seems that Linux doesn't need to fault in stack pages
3520     else
3521         check = (config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check
3522             || (config.exe & (EX_windos & EX_flat) && xlocalsize >= 0x1000);
3523 
3524     if (check)
3525     {
3526         if (I16)
3527         {
3528             // BUG: Won't work if parameter is passed in AX
3529             movregconst(cdb,AX,xlocalsize,false); // MOV AX,localsize
3530             makeitextern(getRtlsym(RTLSYM_CHKSTK));
3531                                                     // CALL _chkstk
3532             cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_CHKSTK));
3533             useregs((ALLREGS | mBP | mES) & ~getRtlsym(RTLSYM_CHKSTK).Sregsaved);
3534         }
3535         else
3536         {
3537             /* Watch out for 64 bit code where EDX is passed as a register parameter
3538              */
3539             reg_t reg = I64 ? R11 : DX;  // scratch register
3540 
3541             /*      MOV     EDX, xlocalsize/0x1000
3542              *  L1: SUB     ESP, 0x1000
3543              *      TEST    [ESP],ESP
3544              *      DEC     EDX
3545              *      JNE     L1
3546              *      SUB     ESP, xlocalsize % 0x1000
3547              */
3548             movregconst(cdb, reg, xlocalsize / 0x1000, false);
3549             cod3_stackadj(cdb, 0x1000);
3550             code_orflag(cdb.last(), CFtarg2);
3551             cdb.gen2sib(0x85, modregrm(0,SP,4),modregrm(0,4,SP));
3552             if (I64)
3553             {   cdb.gen2(0xFF, modregrmx(3,1,R11));   // DEC R11D
3554                 cdb.genc2(JNE,0,cast(targ_uns)-15);
3555             }
3556             else
3557             {   cdb.gen1(0x48 + DX);                  // DEC EDX
3558                 cdb.genc2(JNE,0,cast(targ_uns)-12);
3559             }
3560             regimmed_set(reg,0);             // reg is now 0
3561             cod3_stackadj(cdb, xlocalsize & 0xFFF);
3562             useregs(mask(reg));
3563         }
3564     }
3565     else
3566     {
3567         if (enter)
3568         {   // ENTER xlocalsize,0
3569             cdb.genc(ENTER,0,FLconst,xlocalsize,FLconst,cast(targ_uns) 0);
3570             assert(!(config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)); // didn't emit Dwarf data
3571         }
3572         else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized)
3573         {
3574             cdb. gen1(0x50 + pushallocreg);    // PUSH AX
3575             // Do this to prevent an -x[EBP] to be moved in
3576             // front of the push.
3577             code_orflag(cdb.last(),CFvolatile);
3578             *pushalloc = true;
3579         }
3580         else
3581             cod3_stackadj(cdb, xlocalsize);
3582     }
3583 }
3584 
3585 void prolog_frameadj2(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool* pushalloc)
3586 {
3587     uint pushallocreg = (tyf == TYmfunc) ? CX : AX;
3588     if (xlocalsize == REGSIZE)
3589     {
3590         cdb.gen1(0x50 + pushallocreg);      // PUSH AX
3591         *pushalloc = true;
3592     }
3593     else if (xlocalsize == 2 * REGSIZE)
3594     {
3595         cdb.gen1(0x50 + pushallocreg);      // PUSH AX
3596         cdb.gen1(0x50 + pushallocreg);      // PUSH AX
3597         *pushalloc = true;
3598     }
3599     else
3600         cod3_stackadj(cdb, xlocalsize);
3601 }
3602 
3603 void prolog_setupalloca(ref CodeBuilder cdb)
3604 {
3605     //printf("prolog_setupalloca() offset x%x size x%x alignment x%x\n",
3606         //cast(int)Alloca.offset, cast(int)Alloca.size, cast(int)Alloca.alignment);
3607     // Set up magic parameter for alloca()
3608     // MOV -REGSIZE[BP],localsize - BPoff
3609     cdb.genc(0xC7,modregrm(2,0,BPRM),
3610             FLconst,Alloca.offset + BPoff,
3611             FLconst,localsize - BPoff);
3612     if (I64)
3613         code_orrex(cdb.last(), REX_W);
3614 }
3615 
3616 /**************************************
3617  * Save registers that the function destroys,
3618  * but that the ABI says should be preserved across
3619  * function calls.
3620  *
3621  * Emit Dwarf info for these saves.
3622  * Params:
3623  *      cdb = append generated instructions to this
3624  *      topush = mask of registers to push
3625  *      cfa_offset = offset of frame pointer from CFA
3626  */
3627 
3628 void prolog_saveregs(ref CodeBuilder cdb, regm_t topush, int cfa_offset)
3629 {
3630     if (pushoffuse)
3631     {
3632         // Save to preallocated section in the stack frame
3633         int xmmtopush = numbitsset(topush & XMMREGS);   // XMM regs take 16 bytes
3634         int gptopush = numbitsset(topush) - xmmtopush;  // general purpose registers to save
3635         targ_size_t xmmoffset = pushoff + BPoff;
3636         if (!hasframe || enforcealign)
3637             xmmoffset += EBPtoESP;
3638         targ_size_t gpoffset = xmmoffset + xmmtopush * 16;
3639         while (topush)
3640         {
3641             reg_t reg = findreg(topush);
3642             topush &= ~mask(reg);
3643             if (isXMMreg(reg))
3644             {
3645                 if (hasframe && !enforcealign)
3646                 {
3647                     // MOVUPD xmmoffset[EBP],xmm
3648                     cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset);
3649                 }
3650                 else
3651                 {
3652                     // MOVUPD xmmoffset[ESP],xmm
3653                     cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset);
3654                 }
3655                 xmmoffset += 16;
3656             }
3657             else
3658             {
3659                 if (hasframe && !enforcealign)
3660                 {
3661                     // MOV gpoffset[EBP],reg
3662                     cdb.genc1(0x89,modregxrm(2,reg,BPRM),FLconst,gpoffset);
3663                 }
3664                 else
3665                 {
3666                     // MOV gpoffset[ESP],reg
3667                     cdb.genc1(0x89,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset);
3668                 }
3669                 if (I64)
3670                     code_orrex(cdb.last(), REX_W);
3671                 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D ||
3672                     config.ehmethod == EHmethod.EH_DWARF)
3673                 {   // Emit debug_frame data giving location of saved register
3674                     code *c = cdb.finish();
3675                     pinholeopt(c, null);
3676                     dwarf_CFA_set_loc(calcblksize(c));  // address after save
3677                     dwarf_CFA_offset(reg, cast(int)(gpoffset - cfa_offset));
3678                     cdb.reset();
3679                     cdb.append(c);
3680                 }
3681                 gpoffset += REGSIZE;
3682             }
3683         }
3684     }
3685     else
3686     {
3687         while (topush)                      /* while registers to push      */
3688         {
3689             reg_t reg = findreg(topush);
3690             topush &= ~mask(reg);
3691             if (isXMMreg(reg))
3692             {
3693                 // SUB RSP,16
3694                 cod3_stackadj(cdb, 16);
3695                 // MOVUPD 0[RSP],xmm
3696                 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
3697                 EBPtoESP += 16;
3698                 spoff += 16;
3699             }
3700             else
3701             {
3702                 genpush(cdb, reg);
3703                 EBPtoESP += REGSIZE;
3704                 spoff += REGSIZE;
3705                 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D ||
3706                     config.ehmethod == EHmethod.EH_DWARF)
3707                 {   // Emit debug_frame data giving location of saved register
3708                     // relative to 0[EBP]
3709                     code *c = cdb.finish();
3710                     pinholeopt(c, null);
3711                     dwarf_CFA_set_loc(calcblksize(c));  // address after PUSH reg
3712                     dwarf_CFA_offset(reg, -EBPtoESP - cfa_offset);
3713                     cdb.reset();
3714                     cdb.append(c);
3715                 }
3716             }
3717         }
3718     }
3719 }
3720 
3721 /**************************************
3722  * Undo prolog_saveregs()
3723  */
3724 
3725 private void epilog_restoreregs(ref CodeBuilder cdb, regm_t topop)
3726 {
3727     debug
3728     if (topop & ~(XMMREGS | 0xFFFF))
3729         printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg));
3730 
3731     assert(!(topop & ~(XMMREGS | 0xFFFF)));
3732     if (pushoffuse)
3733     {
3734         // Save to preallocated section in the stack frame
3735         int xmmtopop = numbitsset(topop & XMMREGS);   // XMM regs take 16 bytes
3736         int gptopop = numbitsset(topop) - xmmtopop;   // general purpose registers to save
3737         targ_size_t xmmoffset = pushoff + BPoff;
3738         if (!hasframe || enforcealign)
3739             xmmoffset += EBPtoESP;
3740         targ_size_t gpoffset = xmmoffset + xmmtopop * 16;
3741         while (topop)
3742         {
3743             reg_t reg = findreg(topop);
3744             topop &= ~mask(reg);
3745             if (isXMMreg(reg))
3746             {
3747                 if (hasframe && !enforcealign)
3748                 {
3749                     // MOVUPD xmm,xmmoffset[EBP]
3750                     cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset);
3751                 }
3752                 else
3753                 {
3754                     // MOVUPD xmm,xmmoffset[ESP]
3755                     cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset);
3756                 }
3757                 xmmoffset += 16;
3758             }
3759             else
3760             {
3761                 if (hasframe && !enforcealign)
3762                 {
3763                     // MOV reg,gpoffset[EBP]
3764                     cdb.genc1(0x8B,modregxrm(2,reg,BPRM),FLconst,gpoffset);
3765                 }
3766                 else
3767                 {
3768                     // MOV reg,gpoffset[ESP]
3769                     cdb.genc1(0x8B,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset);
3770                 }
3771                 if (I64)
3772                     code_orrex(cdb.last(), REX_W);
3773                 gpoffset += REGSIZE;
3774             }
3775         }
3776     }
3777     else
3778     {
3779         reg_t reg = I64 ? XMM7 : DI;
3780         if (!(topop & XMMREGS))
3781             reg = R15;
3782         regm_t regm = 1 << reg;
3783 
3784         while (topop)
3785         {   if (topop & regm)
3786             {
3787                 if (isXMMreg(reg))
3788                 {
3789                     // MOVUPD xmm,0[RSP]
3790                     cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0);
3791                     // ADD RSP,16
3792                     cod3_stackadj(cdb, -16);
3793                 }
3794                 else
3795                 {
3796                     cdb.gen1(0x58 + (reg & 7));         // POP reg
3797                     if (reg & 8)
3798                         code_orrex(cdb.last(), REX_B);
3799                 }
3800                 topop &= ~regm;
3801             }
3802             regm >>= 1;
3803             reg--;
3804         }
3805     }
3806 }
3807 
3808 version (SCPP)
3809 {
3810 void prolog_trace(ref CodeBuilder cdb, bool farfunc, uint* regsaved)
3811 {
3812     Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_PRO_F : RTLSYM_TRACE_PRO_N);
3813     makeitextern(s);
3814     cdb.gencs(I16 ? 0x9A : CALL,0,FLfunc,s);      // CALL _trace
3815     if (!I16)
3816         code_orflag(cdb.last(),CFoff | CFselfrel);
3817     /* Embedding the function name inline after the call works, but it
3818      * makes disassembling the code annoying.
3819      */
3820     static if (ELFOBJ || MACHOBJ)
3821     {
3822         // Generate length prefixed name that is recognized by profiler
3823         size_t len = strlen(funcsym_p.Sident);
3824         char *buffer = cast(char *)malloc(len + 4);
3825         assert(buffer);
3826         if (len <= 254)
3827         {
3828             buffer[0] = len;
3829             memcpy(buffer + 1, funcsym_p.Sident, len);
3830             len++;
3831         }
3832         else
3833         {
3834             buffer[0] = 0xFF;
3835             buffer[1] = 0;
3836             buffer[2] = len & 0xFF;
3837             buffer[3] = len >> 8;
3838             memcpy(buffer + 4, funcsym_p.Sident, len);
3839             len += 4;
3840         }
3841         cdb.genasm(buffer, len);         // append func name
3842         free(buffer);
3843     }
3844     else
3845     {
3846         char [IDMAX+IDOHD+1] name = void;
3847         size_t len = objmod.mangle(funcsym_p,name.ptr);
3848         assert(len < name.length);
3849         cdb.genasm(name.ptr,len);             // append func name
3850     }
3851     *regsaved = s.Sregsaved;
3852 }
3853 }
3854 
3855 /******************************
3856  * Generate special varargs prolog for Posix 64 bit systems.
3857  * Params:
3858  *      cdb = sink for generated code
3859  *      sv = symbol for __va_argsave
3860  *      namedargs = registers that named parameters (not ... arguments) were passed in.
3861  */
3862 void prolog_genvarargs(ref CodeBuilder cdb, Symbol* sv, regm_t namedargs)
3863 {
3864     /* Generate code to move any arguments passed in registers into
3865      * the stack variable __va_argsave,
3866      * so we can reference it via pointers through va_arg().
3867      *   struct __va_argsave_t {
3868      *     size_t[6] regs;
3869      *     real[8] fpregs;
3870      *     uint offset_regs;
3871      *     uint offset_fpregs;
3872      *     void* stack_args;
3873      *     void* reg_args;
3874      *   }
3875      * The MOVAPS instructions seg fault if data is not aligned on
3876      * 16 bytes, so this gives us a nice check to ensure no mistakes.
3877         MOV     voff+0*8[RBP],EDI
3878         MOV     voff+1*8[RBP],ESI
3879         MOV     voff+2*8[RBP],RDX
3880         MOV     voff+3*8[RBP],RCX
3881         MOV     voff+4*8[RBP],R8
3882         MOV     voff+5*8[RBP],R9
3883         MOVZX   EAX,AL                      // AL = 0..8, # of XMM registers used
3884         SHL     EAX,2                       // 4 bytes for each MOVAPS
3885         LEA     R11,offset L2[RIP]
3886         SUB     R11,RAX
3887         LEA     RAX,voff+6*8+0x7F[RBP]
3888         JMP     R11d
3889         MOVAPS  -0x0F[RAX],XMM7             // only save XMM registers if actually used
3890         MOVAPS  -0x1F[RAX],XMM6
3891         MOVAPS  -0x2F[RAX],XMM5
3892         MOVAPS  -0x3F[RAX],XMM4
3893         MOVAPS  -0x4F[RAX],XMM3
3894         MOVAPS  -0x5F[RAX],XMM2
3895         MOVAPS  -0x6F[RAX],XMM1
3896         MOVAPS  -0x7F[RAX],XMM0
3897       L2:
3898         MOV     1[RAX],offset_regs          // set __va_argsave.offset_regs
3899         MOV     5[RAX],offset_fpregs        // set __va_argsave.offset_fpregs
3900         LEA     R11, Para.size+Para.offset[RBP]
3901         MOV     9[RAX],R11                  // set __va_argsave.stack_args
3902         SUB     RAX,6*8+0x7F                // point to start of __va_argsave
3903         MOV     6*8+8*16+4+4+8[RAX],RAX     // set __va_argsave.reg_args
3904     * RAX and R11 are destroyed.
3905     */
3906 
3907     /* Save registers into the voff area on the stack
3908      */
3909     targ_size_t voff = Auto.size + BPoff + sv.Soffset;  // EBP offset of start of sv
3910     const int vregnum = 6;
3911     const uint vsize = vregnum * 8 + 8 * 16;
3912 
3913     static immutable ubyte[vregnum] regs = [ DI,SI,DX,CX,R8,R9 ];
3914 
3915     if (!hasframe || enforcealign)
3916         voff += EBPtoESP;
3917 
3918     for (int i = 0; i < vregnum; i++)
3919     {
3920         uint r = regs[i];
3921         if (!(mask(r) & namedargs))  // unnamed arguments would be the ... ones
3922         {
3923             uint ea = (REX_W << 16) | modregxrm(2,r,BPRM);
3924             if (!hasframe || enforcealign)
3925                 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4);
3926             cdb.genc1(0x89,ea,FLconst,voff + i*8);
3927         }
3928     }
3929 
3930     genregs(cdb,MOVZXb,AX,AX);                 // MOVZX EAX,AL
3931     cdb.genc2(0xC1,modregrm(3,4,AX),2);                     // SHL EAX,2
3932     int raxoff = cast(int)(voff+6*8+0x7F);
3933     uint L2offset = (raxoff < -0x7F) ? 0x2D : 0x2A;
3934     if (!hasframe || enforcealign)
3935         L2offset += 1;                                      // +1 for sib byte
3936     // LEA R11,offset L2[RIP]
3937     cdb.genc1(LEA,(REX_W << 16) | modregxrm(0,R11,5),FLconst,L2offset);
3938     genregs(cdb,0x29,AX,R11);                  // SUB R11,RAX
3939     code_orrex(cdb.last(), REX_W);
3940     // LEA RAX,voff+vsize-6*8-16+0x7F[RBP]
3941     uint ea = (REX_W << 16) | modregrm(2,AX,BPRM);
3942     if (!hasframe || enforcealign)
3943         // add sib byte for [RSP] addressing
3944         ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4);
3945     cdb.genc1(LEA,ea,FLconst,raxoff);
3946     cdb.gen2(0xFF,modregrmx(3,4,R11));                      // JMP R11d
3947     for (int i = 0; i < 8; i++)
3948     {
3949         // MOVAPS -15-16*i[RAX],XMM7-i
3950         cdb.genc1(0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i);
3951     }
3952 
3953     /* Compute offset_regs and offset_fpregs
3954      */
3955     uint offset_regs = 0;
3956     uint offset_fpregs = vregnum * 8;
3957     for (int i = AX; i <= XMM7; i++)
3958     {
3959         regm_t m = mask(i);
3960         if (m & namedargs)
3961         {
3962             if (m & (mDI|mSI|mDX|mCX|mR8|mR9))
3963                 offset_regs += 8;
3964             else if (m & XMMREGS)
3965                 offset_fpregs += 16;
3966             namedargs &= ~m;
3967             if (!namedargs)
3968                 break;
3969         }
3970     }
3971     // MOV 1[RAX],offset_regs
3972     cdb.genc(0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs);
3973 
3974     // MOV 5[RAX],offset_fpregs
3975     cdb.genc(0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs);
3976 
3977     // LEA R11, Para.size+Para.offset[RBP]
3978     ea = modregxrm(2,R11,BPRM);
3979     if (!hasframe)
3980         ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4);
3981     Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1);
3982     cdb.genc1(LEA,(REX_W << 16) | ea,FLconst,Para.size + Para.offset);
3983 
3984     // MOV 9[RAX],R11
3985     cdb.genc1(0x89,(REX_W << 16) | modregxrm(2,R11,AX),FLconst,9);
3986 
3987     // SUB RAX,6*8+0x7F             // point to start of __va_argsave
3988     cdb.genc2(0x2D,0,6*8+0x7F);
3989     code_orrex(cdb.last(), REX_W);
3990 
3991     // MOV 6*8+8*16+4+4+8[RAX],RAX  // set __va_argsave.reg_args
3992     cdb.genc1(0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8);
3993 
3994     pinholeopt(cdb.peek(), null);
3995     useregs(mAX|mR11);
3996 }
3997 
3998 void prolog_gen_win64_varargs(ref CodeBuilder cdb)
3999 {
4000     /* The Microsoft scheme.
4001      * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80)
4002      * Copy registers onto stack.
4003          mov     8[RSP],RCX
4004          mov     010h[RSP],RDX
4005          mov     018h[RSP],R8
4006          mov     020h[RSP],R9
4007      */
4008 }
4009 
4010 /************************************
4011  * Params:
4012  *      cdb = generated code sink
4013  *      tf = what's the type of the function
4014  *      pushalloc = use PUSH to allocate on the stack rather than subtracting from SP
4015  *      namedargs = set to the registers that named parameters were passed in
4016  */
4017 void prolog_loadparams(ref CodeBuilder cdb, tym_t tyf, bool pushalloc, out regm_t namedargs)
4018 {
4019     //printf("prolog_loadparams()\n");
4020     debug
4021     for (SYMIDX si = 0; si < globsym.length; si++)
4022     {
4023         Symbol *s = globsym[si];
4024         if (debugr && (s.Sclass == SCfastpar || s.Sclass == SCshadowreg))
4025         {
4026             printf("symbol '%s' is fastpar in register [l %s, m %s]\n", s.Sident.ptr,
4027                 regm_str(mask(s.Spreg)),
4028                 (s.Spreg2 == NOREG ? "NOREG" : regm_str(mask(s.Spreg2))));
4029             if (s.Sfl == FLreg)
4030                 printf("\tassigned to register %s\n", regm_str(mask(s.Sreglsw)));
4031         }
4032     }
4033 
4034     uint pushallocreg = (tyf == TYmfunc) ? CX : AX;
4035 
4036     /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were not assigned
4037      * registers into their stack locations.
4038      */
4039     regm_t shadowregm = 0;
4040     for (SYMIDX si = 0; si < globsym.length; si++)
4041     {
4042         Symbol *s = globsym[si];
4043         uint sz = cast(uint)type_size(s.Stype);
4044 
4045         if (!((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl != FLreg))
4046             continue;
4047         // Argument is passed in a register
4048 
4049         type *t = s.Stype;
4050         type *t2 = null;
4051 
4052         tym_t tyb = tybasic(t.Tty);
4053 
4054         // This logic is same as FuncParamRegs_alloc function at src/dmd/backend/cod1.d
4055         //
4056         // Find suitable SROA based on the element type
4057         // (Don't put volatile parameters in registers)
4058         if (tyb == TYarray && !(t.Tty & mTYvolatile))
4059         {
4060             type *targ1;
4061             argtypes(t, targ1, t2);
4062             if (targ1)
4063                 t = targ1;
4064         }
4065 
4066         // If struct just wraps another type
4067         if (tyb == TYstruct)
4068         {
4069             // On windows 64 bits, structs occupy a general purpose register,
4070             // regardless of the struct size or the number & types of its fields.
4071             if (config.exe != EX_WIN64)
4072             {
4073                 type *targ1 = t.Ttag.Sstruct.Sarg1type;
4074                 t2 = t.Ttag.Sstruct.Sarg2type;
4075                 if (targ1)
4076                     t = targ1;
4077             }
4078         }
4079 
4080         if (Symbol_Sisdead(s, anyiasm))
4081         {
4082             // Ignore it, as it is never referenced
4083             continue;
4084         }
4085 
4086         targ_size_t offset = Fast.size + BPoff;
4087         if (s.Sclass == SCshadowreg)
4088             offset = Para.size;
4089         offset += s.Soffset;
4090         if (!hasframe || (enforcealign && s.Sclass != SCshadowreg))
4091             offset += EBPtoESP;
4092 
4093         reg_t preg = s.Spreg;
4094         foreach (i; 0 .. 2)     // twice, once for each possible parameter register
4095         {
4096             shadowregm |= mask(preg);
4097             opcode_t op = 0x89;                  // MOV x[EBP],preg
4098             if (isXMMreg(preg))
4099                 op = xmmstore((t.Tty & TYarray) && t.Tnext ? t.Tnext.Tty : t.Tty);
4100             if (!(pushalloc && preg == pushallocreg) || s.Sclass == SCshadowreg)
4101             {
4102                 if (hasframe && (!enforcealign || s.Sclass == SCshadowreg))
4103                 {
4104                     // MOV x[EBP],preg
4105                     cdb.genc1(op,modregxrm(2,preg,BPRM),FLconst,offset);
4106                     if (isXMMreg(preg))
4107                     {
4108                         checkSetVex(cdb.last(), t.Tty);
4109                     }
4110                     else
4111                     {
4112                         //printf("%s Fast.size = %d, BPoff = %d, Soffset = %d, sz = %d\n",
4113                         //         s.Sident, (int)Fast.size, (int)BPoff, (int)s.Soffset, (int)sz);
4114                         if (I64 && sz > 4)
4115                             code_orrex(cdb.last(), REX_W);
4116                     }
4117                 }
4118                 else
4119                 {
4120                     // MOV offset[ESP],preg
4121                     // BUG: byte size?
4122                     cdb.genc1(op,
4123                               (modregrm(0,4,SP) << 8) |
4124                                modregxrm(2,preg,4),FLconst,offset);
4125                     if (isXMMreg(preg))
4126                     {
4127                         checkSetVex(cdb.last(), t.Tty);
4128                     }
4129                     else
4130                     {
4131                         if (I64 && sz > 4)
4132                             cdb.last().Irex |= REX_W;
4133                     }
4134                 }
4135             }
4136             preg = s.Spreg2;
4137             if (preg == NOREG)
4138                 break;
4139             if (t2)
4140                 t = t2;
4141             offset += REGSIZE;
4142         }
4143     }
4144 
4145     if (config.exe == EX_WIN64 && variadic(funcsym_p.Stype))
4146     {
4147         /* The Microsoft scheme.
4148          * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80)
4149          * Copy registers onto stack.
4150              mov     8[RSP],RCX or XMM0
4151              mov     010h[RSP],RDX or XMM1
4152              mov     018h[RSP],R8 or XMM2
4153              mov     020h[RSP],R9 or XMM3
4154          */
4155         static immutable reg_t[4] vregs = [ CX,DX,R8,R9 ];
4156         for (int i = 0; i < vregs.length; ++i)
4157         {
4158             uint preg = vregs[i];
4159             uint offset = cast(uint)(Para.size + i * REGSIZE);
4160             if (!(shadowregm & (mask(preg) | mask(XMM0 + i))))
4161             {
4162                 if (hasframe)
4163                 {
4164                     // MOV x[EBP],preg
4165                     cdb.genc1(0x89,
4166                                      modregxrm(2,preg,BPRM),FLconst, offset);
4167                     code_orrex(cdb.last(), REX_W);
4168                 }
4169                 else
4170                 {
4171                     // MOV offset[ESP],preg
4172                     cdb.genc1(0x89,
4173                                      (modregrm(0,4,SP) << 8) |
4174                                      modregxrm(2,preg,4),FLconst,offset + EBPtoESP);
4175                 }
4176                 cdb.last().Irex |= REX_W;
4177             }
4178         }
4179     }
4180 
4181     /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were assigned registers
4182      * into their assigned registers.
4183      * Note that we have a big problem if Pa is passed in R1 and assigned to R2,
4184      * and Pb is passed in R2 but assigned to R1. Detect it and assert.
4185      */
4186     regm_t assignregs = 0;
4187     for (SYMIDX si = 0; si < globsym.length; si++)
4188     {
4189         Symbol *s = globsym[si];
4190         uint sz = cast(uint)type_size(s.Stype);
4191 
4192         if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)
4193             namedargs |= s.Spregm();
4194 
4195         if (!((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl == FLreg))
4196         {
4197             // Argument is passed in a register
4198             continue;
4199         }
4200 
4201         type *t = s.Stype;
4202         type *t2 = null;
4203         if (tybasic(t.Tty) == TYstruct && config.exe != EX_WIN64)
4204         {   type *targ1 = t.Ttag.Sstruct.Sarg1type;
4205             t2 = t.Ttag.Sstruct.Sarg2type;
4206             if (targ1)
4207                 t = targ1;
4208         }
4209 
4210         reg_t preg = s.Spreg;
4211         reg_t r = s.Sreglsw;
4212         for (int i = 0; i < 2; ++i)
4213         {
4214             if (preg == NOREG)
4215                 break;
4216             assert(!(mask(preg) & assignregs));         // not already stepped on
4217             assignregs |= mask(r);
4218 
4219             // MOV reg,preg
4220             if (r == preg)
4221             {
4222             }
4223             else if (mask(preg) & XMMREGS)
4224             {
4225                 const op = xmmload(t.Tty);      // MOVSS/D xreg,preg
4226                 uint xreg = r - XMM0;
4227                 cdb.gen2(op,modregxrmx(3,xreg,preg - XMM0));
4228             }
4229             else
4230             {
4231                 //printf("test1 mov %s, %s\n", regstring[r], regstring[preg]);
4232                 genmovreg(cdb,r,preg);
4233                 if (I64 && sz == 8)
4234                     code_orrex(cdb.last(), REX_W);
4235             }
4236             preg = s.Spreg2;
4237             r = s.Sregmsw;
4238             if (t2)
4239                 t = t2;
4240         }
4241     }
4242 
4243     /* For parameters that were passed on the stack, but are enregistered,
4244      * initialize the registers with the parameter stack values.
4245      * Do not use assignaddr(), as it will replace the stack reference with
4246      * the register.
4247      */
4248     for (SYMIDX si = 0; si < globsym.length; si++)
4249     {
4250         Symbol *s = globsym[si];
4251         uint sz = cast(uint)type_size(s.Stype);
4252 
4253         if (!((s.Sclass == SCregpar || s.Sclass == SCparameter) &&
4254             s.Sfl == FLreg &&
4255             (refparam
4256                 // This variable has been reference by a nested function
4257                 || MARS && s.Stype.Tty & mTYvolatile
4258                 )))
4259         {
4260             continue;
4261         }
4262         // MOV reg,param[BP]
4263         //assert(refparam);
4264         if (mask(s.Sreglsw) & XMMREGS)
4265         {
4266             const op = xmmload(s.Stype.Tty);  // MOVSS/D xreg,mem
4267             uint xreg = s.Sreglsw - XMM0;
4268             cdb.genc1(op,modregxrm(2,xreg,BPRM),FLconst,Para.size + s.Soffset);
4269             if (!hasframe)
4270             {   // Convert to ESP relative address rather than EBP
4271                 code *c = cdb.last();
4272                 c.Irm = cast(ubyte)modregxrm(2,xreg,4);
4273                 c.Isib = modregrm(0,4,SP);
4274                 c.IEV1.Vpointer += EBPtoESP;
4275             }
4276             continue;
4277         }
4278 
4279         cdb.genc1(sz == 1 ? 0x8A : 0x8B,
4280             modregxrm(2,s.Sreglsw,BPRM),FLconst,Para.size + s.Soffset);
4281         code *c = cdb.last();
4282         if (!I16 && sz == SHORTSIZE)
4283             c.Iflags |= CFopsize; // operand size
4284         if (I64 && sz >= REGSIZE)
4285             c.Irex |= REX_W;
4286         if (I64 && sz == 1 && s.Sreglsw >= 4)
4287             c.Irex |= REX;
4288         if (!hasframe)
4289         {   // Convert to ESP relative address rather than EBP
4290             assert(!I16);
4291             c.Irm = cast(ubyte)modregxrm(2,s.Sreglsw,4);
4292             c.Isib = modregrm(0,4,SP);
4293             c.IEV1.Vpointer += EBPtoESP;
4294         }
4295         if (sz > REGSIZE)
4296         {
4297             cdb.genc1(0x8B,
4298                 modregxrm(2,s.Sregmsw,BPRM),FLconst,Para.size + s.Soffset + REGSIZE);
4299             code *cx = cdb.last();
4300             if (I64)
4301                 cx.Irex |= REX_W;
4302             if (!hasframe)
4303             {   // Convert to ESP relative address rather than EBP
4304                 assert(!I16);
4305                 cx.Irm = cast(ubyte)modregxrm(2,s.Sregmsw,4);
4306                 cx.Isib = modregrm(0,4,SP);
4307                 cx.IEV1.Vpointer += EBPtoESP;
4308             }
4309         }
4310     }
4311 }
4312 
4313 /*******************************
4314  * Generate and return function epilog.
4315  * Output:
4316  *      retsize         Size of function epilog
4317  */
4318 
4319 void epilog(block *b)
4320 {
4321     code *cpopds;
4322     reg_t reg;
4323     reg_t regx;                      // register that's not a return reg
4324     regm_t topop,regm;
4325     targ_size_t xlocalsize = localsize;
4326 
4327     CodeBuilder cdbx; cdbx.ctor();
4328     tym_t tyf = funcsym_p.ty();
4329     tym_t tym = tybasic(tyf);
4330     bool farfunc = tyfarfunc(tym) != 0;
4331     if (!(b.Bflags & BFLepilog))       // if no epilog code
4332         goto Lret;                      // just generate RET
4333     regx = (b.BC == BCret) ? AX : CX;
4334 
4335     retsize = 0;
4336 
4337     if (tyf & mTYnaked)                 // if no prolog/epilog
4338         return;
4339 
4340     if (tym == TYifunc)
4341     {
4342         static immutable ubyte[5] ops2 = [ 0x07,0x1F,0x61,0xCF,0 ];
4343         static immutable ubyte[12] ops0 = [ 0x07,0x1F,0x5F,0x5E,
4344                                         0x5D,0x5B,0x5B,0x5A,
4345                                         0x59,0x58,0xCF,0 ];
4346 
4347         genregs(cdbx,0x8B,SP,BP);              // MOV SP,BP
4348         auto p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr;
4349         do
4350             cdbx.gen1(*p);
4351         while (*++p);
4352         goto Lopt;
4353     }
4354 
4355     if (config.flags & CFGtrace &&
4356         (!(config.flags4 & CFG4allcomdat) ||
4357          funcsym_p.Sclass == SCcomdat ||
4358          funcsym_p.Sclass == SCglobal ||
4359          (config.flags2 & CFG2comdat && SymInline(funcsym_p))
4360         )
4361        )
4362     {
4363         Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N);
4364         makeitextern(s);
4365         cdbx.gencs(I16 ? 0x9A : CALL,0,FLfunc,s);      // CALLF _trace
4366         if (!I16)
4367             code_orflag(cdbx.last(),CFoff | CFselfrel);
4368         useregs((ALLREGS | mBP | mES) & ~s.Sregsaved);
4369     }
4370 
4371     if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.exe == EX_WIN32 || MARS))
4372     {
4373         nteh_epilog(cdbx);
4374     }
4375 
4376     cpopds = null;
4377     if (tyf & mTYloadds)
4378     {
4379         cdbx.gen1(0x1F);             // POP DS
4380         cpopds = cdbx.last();
4381     }
4382 
4383     /* Pop all the general purpose registers saved on the stack
4384      * by the prolog code. Remember to do them in the reverse
4385      * order they were pushed.
4386      */
4387     topop = fregsaved & ~mfuncreg;
4388     epilog_restoreregs(cdbx, topop);
4389 
4390     version (MARS)
4391     {
4392         if (usednteh & NTEHjmonitor)
4393         {
4394             regm_t retregs = 0;
4395             if (b.BC == BCretexp)
4396                 retregs = regmask(b.Belem.Ety, tym);
4397             nteh_monitor_epilog(cdbx,retregs);
4398             xlocalsize += 8;
4399         }
4400     }
4401 
4402     if (config.wflags & WFwindows && farfunc)
4403     {
4404         int wflags = config.wflags;
4405         if (wflags & WFreduced && !(tyf & mTYexport))
4406         {   // reduced prolog/epilog for non-exported functions
4407             wflags &= ~(WFdgroup | WFds | WFss);
4408             if (!(wflags & WFsaveds))
4409                 goto L4;
4410         }
4411 
4412         if (localsize)
4413         {
4414             cdbx.genc1(LEA,modregrm(1,SP,6),FLconst,cast(targ_uns)-2); /* LEA SP,-2[BP] */
4415         }
4416         if (wflags & (WFsaveds | WFds | WFss | WFdgroup))
4417         {
4418             if (cpopds)
4419                 cpopds.Iop = NOP;              // don't need previous one
4420             cdbx.gen1(0x1F);                    // POP DS
4421         }
4422         cdbx.gen1(0x58 + BP);                   // POP BP
4423         if (config.wflags & WFincbp)
4424             cdbx.gen1(0x48 + BP);               // DEC BP
4425         assert(hasframe);
4426     }
4427     else
4428     {
4429         if (needframe || (xlocalsize && hasframe))
4430         {
4431         L4:
4432             assert(hasframe);
4433             if (xlocalsize || enforcealign)
4434             {
4435                 if (config.flags2 & CFG2stomp)
4436                 {   /*   MOV  ECX,0xBEAF
4437                      * L1:
4438                      *   MOV  [ESP],ECX
4439                      *   ADD  ESP,4
4440                      *   CMP  EBP,ESP
4441                      *   JNE  L1
4442                      *   POP  EBP
4443                      */
4444                     /* Value should be:
4445                      * 1. != 0 (code checks for null pointers)
4446                      * 2. be odd (to mess up alignment)
4447                      * 3. fall in first 64K (likely marked as inaccessible)
4448                      * 4. be a value that stands out in the debugger
4449                      */
4450                     assert(I32 || I64);
4451                     targ_size_t value = 0x0000BEAF;
4452                     reg_t regcx = CX;
4453                     mfuncreg &= ~mask(regcx);
4454                     uint grex = I64 ? REX_W << 16 : 0;
4455                     cdbx.genc2(0xC7,grex | modregrmx(3,0,regcx),value);   // MOV regcx,value
4456                     cdbx.gen2sib(0x89,grex | modregrm(0,regcx,4),modregrm(0,4,SP)); // MOV [ESP],regcx
4457                     code *c1 = cdbx.last();
4458                     cdbx.genc2(0x81,grex | modregrm(3,0,SP),REGSIZE);     // ADD ESP,REGSIZE
4459                     genregs(cdbx,0x39,SP,BP);                             // CMP EBP,ESP
4460                     if (I64)
4461                         code_orrex(cdbx.last(),REX_W);
4462                     genjmp(cdbx,JNE,FLcode,cast(block *)c1);                  // JNE L1
4463                     // explicitly mark as short jump, needed for correct retsize calculation (Bugzilla 15779)
4464                     cdbx.last().Iflags &= ~CFjmp16;
4465                     cdbx.gen1(0x58 + BP);                                 // POP BP
4466                 }
4467                 else if (config.exe == EX_WIN64)
4468                 {   // See http://msdn.microsoft.com/en-us/library/tawsa7cb(v=vs.80).aspx
4469                     // LEA RSP,0[RBP]
4470                     cdbx.genc1(LEA,(REX_W<<16)|modregrm(2,SP,BPRM),FLconst,0);
4471                     cdbx.gen1(0x58 + BP);      // POP RBP
4472                 }
4473                 else if (config.target_cpu >= TARGET_80286 &&
4474                     !(config.target_cpu >= TARGET_80386 && config.flags4 & CFG4speed)
4475                    )
4476                     cdbx.gen1(LEAVE);          // LEAVE
4477                 else if (0 && xlocalsize == REGSIZE && Alloca.size == 0 && I32)
4478                 {   // This doesn't work - I should figure out why
4479                     mfuncreg &= ~mask(regx);
4480                     cdbx.gen1(0x58 + regx);    // POP regx
4481                     cdbx.gen1(0x58 + BP);      // POP BP
4482                 }
4483                 else
4484                 {
4485                     genregs(cdbx,0x8B,SP,BP);  // MOV SP,BP
4486                     if (I64)
4487                         code_orrex(cdbx.last(), REX_W);   // MOV RSP,RBP
4488                     cdbx.gen1(0x58 + BP);      // POP BP
4489                 }
4490             }
4491             else
4492                 cdbx.gen1(0x58 + BP);          // POP BP
4493             if (config.wflags & WFincbp && farfunc)
4494                 cdbx.gen1(0x48 + BP);              // DEC BP
4495         }
4496         else if (xlocalsize == REGSIZE && (!I16 || b.BC == BCret))
4497         {
4498             mfuncreg &= ~mask(regx);
4499             cdbx.gen1(0x58 + regx);                    // POP regx
4500         }
4501         else if (xlocalsize)
4502             cod3_stackadj(cdbx, cast(int)-xlocalsize);
4503     }
4504     if (b.BC == BCret || b.BC == BCretexp)
4505     {
4506 Lret:
4507         opcode_t op = tyfarfunc(tym) ? 0xCA : 0xC2;
4508         if (tym == TYhfunc)
4509         {
4510             cdbx.genc2(0xC2,0,4);                       // RET 4
4511         }
4512         else if (!typfunc(tym) ||                       // if caller cleans the stack
4513                  config.exe == EX_WIN64 ||
4514                  Para.offset == 0)                      // or nothing pushed on the stack anyway
4515         {
4516             op++;                                       // to a regular RET
4517             cdbx.gen1(op);
4518         }
4519         else
4520         {   // Stack is always aligned on register size boundary
4521             Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1);
4522             if (Para.offset >= 0x10000)
4523             {
4524                 /*
4525                     POP REG
4526                     ADD ESP, Para.offset
4527                     JMP REG
4528                 */
4529                 cdbx.gen1(0x58+regx);
4530                 cdbx.genc2(0x81, modregrm(3,0,SP), Para.offset);
4531                 if (I64)
4532                     code_orrex(cdbx.last(), REX_W);
4533                 cdbx.genc2(0xFF, modregrm(3,4,regx), 0);
4534                 if (I64)
4535                     code_orrex(cdbx.last(), REX_W);
4536             }
4537             else
4538                 cdbx.genc2(op,0,Para.offset);          // RET Para.offset
4539         }
4540     }
4541 
4542 Lopt:
4543     // If last instruction in ce is ADD SP,imm, and first instruction
4544     // in c sets SP, we can dump the ADD.
4545     CodeBuilder cdb; cdb.ctor();
4546     cdb.append(b.Bcode);
4547     code *cr = cdb.last();
4548     code *c = cdbx.peek();
4549     if (cr && c && !I64)
4550     {
4551         if (cr.Iop == 0x81 && cr.Irm == modregrm(3,0,SP))     // if ADD SP,imm
4552         {
4553             if (
4554                 c.Iop == LEAVE ||                                // LEAVE
4555                 (c.Iop == 0x8B && c.Irm == modregrm(3,SP,BP)) || // MOV SP,BP
4556                 (c.Iop == LEA && c.Irm == modregrm(1,SP,6))     // LEA SP,-imm[BP]
4557                )
4558                 cr.Iop = NOP;
4559             else if (c.Iop == 0x58 + BP)                       // if POP BP
4560             {
4561                 cr.Iop = 0x8B;
4562                 cr.Irm = modregrm(3,SP,BP);                    // MOV SP,BP
4563             }
4564         }
4565         else
4566         {
4567 static if (0)
4568 {
4569         // These optimizations don't work if the called function
4570         // cleans off the stack.
4571         if (c.Iop == 0xC3 && cr.Iop == CALL)     // CALL near
4572         {
4573             cr.Iop = 0xE9;                             // JMP near
4574             c.Iop = NOP;
4575         }
4576         else if (c.Iop == 0xCB && cr.Iop == 0x9A)     // CALL far
4577         {
4578             cr.Iop = 0xEA;                             // JMP far
4579             c.Iop = NOP;
4580         }
4581 }
4582         }
4583     }
4584 
4585     pinholeopt(c, null);
4586     retsize += calcblksize(c);          // compute size of function epilog
4587     cdb.append(cdbx);
4588     b.Bcode = cdb.finish();
4589 }
4590 
4591 /*******************************
4592  * Return offset of SP from BP.
4593  */
4594 
4595 targ_size_t cod3_spoff()
4596 {
4597     //printf("spoff = x%x, localsize = x%x\n", (int)spoff, (int)localsize);
4598     return spoff + localsize;
4599 }
4600 
4601 void gen_spill_reg(ref CodeBuilder cdb, Symbol* s, bool toreg)
4602 {
4603     code cs;
4604     const regm_t keepmsk = toreg ? RMload : RMstore;
4605 
4606     elem* e = el_var(s); // so we can trick getlvalue() into working for us
4607 
4608     if (mask(s.Sreglsw) & XMMREGS)
4609     {   // Convert to save/restore of XMM register
4610         if (toreg)
4611             cs.Iop = xmmload(s.Stype.Tty);        // MOVSS/D xreg,mem
4612         else
4613             cs.Iop = xmmstore(s.Stype.Tty);       // MOVSS/D mem,xreg
4614         getlvalue(cdb,&cs,e,keepmsk);
4615         cs.orReg(s.Sreglsw - XMM0);
4616         cdb.gen(&cs);
4617     }
4618     else
4619     {
4620         const int sz = cast(int)type_size(s.Stype);
4621         cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg
4622         cs.Iop ^= (sz == 1);
4623         getlvalue(cdb,&cs,e,keepmsk);
4624         cs.orReg(s.Sreglsw);
4625         if (I64 && sz == 1 && s.Sreglsw >= 4)
4626             cs.Irex |= REX;
4627         if ((cs.Irm & 0xC0) == 0xC0 &&                  // reg,reg
4628             (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 &&      // registers match
4629             (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0)      // REX_R and REX_B match
4630         { }                                             // skip MOV reg,reg
4631         else
4632             cdb.gen(&cs);
4633         if (sz > REGSIZE)
4634         {
4635             cs.setReg(s.Sregmsw);
4636             getlvalue_msw(&cs);
4637             if ((cs.Irm & 0xC0) == 0xC0 &&              // reg,reg
4638                 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 &&  // registers match
4639                 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0)  // REX_R and REX_B match
4640             { }                                         // skip MOV reg,reg
4641             else
4642                 cdb.gen(&cs);
4643         }
4644     }
4645 
4646     el_free(e);
4647 }
4648 
4649 /****************************
4650  * Generate code for, and output a thunk.
4651  * Params:
4652  *      sthunk =  Symbol of thunk
4653  *      sfunc =   Symbol of thunk's target function
4654  *      thisty =  Type of this pointer
4655  *      p =       ESP parameter offset to this pointer
4656  *      d =       offset to add to 'this' pointer
4657  *      d2 =      offset from 'this' to vptr
4658  *      i =       offset into vtbl[]
4659  */
4660 
4661 void cod3_thunk(Symbol *sthunk,Symbol *sfunc,uint p,tym_t thisty,
4662         uint d,int i,uint d2)
4663 {
4664     targ_size_t thunkoffset;
4665 
4666     int seg = sthunk.Sseg;
4667     cod3_align(seg);
4668 
4669     // Skip over return address
4670     tym_t thunkty = tybasic(sthunk.ty());
4671     if (tyfarfunc(thunkty))
4672         p += I32 ? 8 : tysize(TYfptr);          // far function
4673     else
4674         p += tysize(TYnptr);
4675     if (tybasic(sfunc.ty()) == TYhfunc)
4676         p += tysize(TYnptr);                    // skip over hidden pointer
4677 
4678     CodeBuilder cdb; cdb.ctor();
4679     if (!I16)
4680     {
4681         /*
4682            Generate:
4683             ADD p[ESP],d
4684            For direct call:
4685             JMP sfunc
4686            For virtual call:
4687             MOV EAX, p[ESP]                     EAX = this
4688             MOV EAX, d2[EAX]                    EAX = this.vptr
4689             JMP i[EAX]                          jump to virtual function
4690          */
4691         reg_t reg = 0;
4692         if (cast(int)d < 0)
4693         {
4694             d = -d;
4695             reg = 5;                            // switch from ADD to SUB
4696         }
4697         if (thunkty == TYmfunc)
4698         {                                       // ADD ECX,d
4699             if (d)
4700                 cdb.genc2(0x81,modregrm(3,reg,CX),d);
4701         }
4702         else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc))
4703         {                                       // ADD EAX,d
4704             int rm = AX;
4705             if (config.exe == EX_WIN64)
4706                 rm = CX;
4707             else if (I64)
4708                 rm = (thunkty == TYnfunc && (sfunc.Sfunc.Fflags3 & F3hiddenPtr)) ? SI : DI;
4709             if (d)
4710                 cdb.genc2(0x81,modregrm(3,reg,rm),d);
4711         }
4712         else
4713         {
4714             cdb.genc(0x81,modregrm(2,reg,4),
4715                 FLconst,p,                      // to this
4716                 FLconst,d);                     // ADD p[ESP],d
4717             cdb.last().Isib = modregrm(0,4,SP);
4718         }
4719         if (I64 && cdb.peek())
4720             cdb.last().Irex |= REX_W;
4721     }
4722     else
4723     {
4724         /*
4725            Generate:
4726             MOV BX,SP
4727             ADD [SS:] p[BX],d
4728            For direct call:
4729             JMP sfunc
4730            For virtual call:
4731             MOV BX, p[BX]                       BX = this
4732             MOV BX, d2[BX]                      BX = this.vptr
4733             JMP i[BX]                           jump to virtual function
4734          */
4735 
4736         genregs(cdb,0x89,SP,BX);           // MOV BX,SP
4737         cdb.genc(0x81,modregrm(2,0,7),
4738             FLconst,p,                                  // to this
4739             FLconst,d);                                 // ADD p[BX],d
4740         if (config.wflags & WFssneds ||
4741             // If DS needs reloading from SS,
4742             // then assume SS != DS on thunk entry
4743             (LARGEDATA && config.wflags & WFss))
4744             cdb.last().Iflags |= CFss;                 // SS:
4745     }
4746 
4747     if ((i & 0xFFFF) != 0xFFFF)                 // if virtual call
4748     {
4749         const bool FARTHIS = (tysize(thisty) > REGSIZE);
4750         const bool FARVPTR = FARTHIS;
4751 
4752         assert(thisty != TYvptr);               // can't handle this case
4753 
4754         if (!I16)
4755         {
4756             assert(!FARTHIS && !LARGECODE);
4757             if (thunkty == TYmfunc)     // if 'this' is in ECX
4758             {
4759                 // MOV EAX,d2[ECX]
4760                 cdb.genc1(0x8B,modregrm(2,AX,CX),FLconst,d2);
4761             }
4762             else if (thunkty == TYjfunc)        // if 'this' is in EAX
4763             {
4764                 // MOV EAX,d2[EAX]
4765                 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2);
4766             }
4767             else
4768             {
4769                 // MOV EAX,p[ESP]
4770                 cdb.genc1(0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,cast(targ_uns) p);
4771                 if (I64)
4772                     cdb.last().Irex |= REX_W;
4773 
4774                 // MOV EAX,d2[EAX]
4775                 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2);
4776             }
4777             if (I64)
4778                 code_orrex(cdb.last(), REX_W);
4779                                                         // JMP i[EAX]
4780             cdb.genc1(0xFF,modregrm(2,4,0),FLconst,cast(targ_uns) i);
4781         }
4782         else
4783         {
4784             // MOV/LES BX,[SS:] p[BX]
4785             cdb.genc1((FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,cast(targ_uns) p);
4786             if (config.wflags & WFssneds ||
4787                 // If DS needs reloading from SS,
4788                 // then assume SS != DS on thunk entry
4789                 (LARGEDATA && config.wflags & WFss))
4790                 cdb.last().Iflags |= CFss;             // SS:
4791 
4792             // MOV/LES BX,[ES:]d2[BX]
4793             cdb.genc1((FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2);
4794             if (FARTHIS)
4795                 cdb.last().Iflags |= CFes;             // ES:
4796 
4797                                                         // JMP i[BX]
4798             cdb.genc1(0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,cast(targ_uns) i);
4799             if (FARVPTR)
4800                 cdb.last().Iflags |= CFes;             // ES:
4801         }
4802     }
4803     else
4804     {
4805 static if (0)
4806 {
4807         localgot = null;                // no local variables
4808         code *c1 = load_localgot();
4809         if (c1)
4810         {
4811             assignaddrc(c1);
4812             cdb.append(c1);
4813         }
4814 }
4815         cdb.gencs((LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); // JMP sfunc
4816         cdb.last().Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff);
4817     }
4818 
4819     thunkoffset = Offset(seg);
4820     code *c = cdb.finish();
4821     pinholeopt(c,null);
4822     codout(seg,c);
4823     code_free(c);
4824 
4825     sthunk.Soffset = thunkoffset;
4826     sthunk.Ssize = Offset(seg) - thunkoffset; // size of thunk
4827     sthunk.Sseg = seg;
4828     if (config.exe & EX_posix ||
4829        config.objfmt == OBJ_MSCOFF)
4830     {
4831         objmod.pubdef(seg,sthunk,sthunk.Soffset);
4832     }
4833     searchfixlist(sthunk);              // resolve forward refs
4834 }
4835 
4836 /*****************************
4837  * Assume symbol s is extern.
4838  */
4839 
4840 void makeitextern(Symbol *s)
4841 {
4842     if (s.Sxtrnnum == 0)
4843     {
4844         s.Sclass = SCextern;           /* external             */
4845         /*printf("makeitextern(x%x)\n",s);*/
4846         objmod.external(s);
4847     }
4848 }
4849 
4850 
4851 /*******************************
4852  * Replace JMPs in Bgotocode with JMP SHORTs whereever possible.
4853  * This routine depends on FLcode jumps to only be forward
4854  * referenced.
4855  * BFLjmpoptdone is set to true if nothing more can be done
4856  * with this block.
4857  * Input:
4858  *      flag    !=0 means don't have correct Boffsets yet
4859  * Returns:
4860  *      number of bytes saved
4861  */
4862 
4863 int branch(block *bl,int flag)
4864 {
4865     int bytesaved;
4866     code* c,cn,ct;
4867     targ_size_t offset,disp;
4868     targ_size_t csize;
4869 
4870     if (!flag)
4871         bl.Bflags |= BFLjmpoptdone;      // assume this will be all
4872     c = bl.Bcode;
4873     if (!c)
4874         return 0;
4875     bytesaved = 0;
4876     offset = bl.Boffset;                 /* offset of start of block     */
4877     while (1)
4878     {
4879         ubyte op;
4880 
4881         csize = calccodsize(c);
4882         cn = code_next(c);
4883         op = cast(ubyte)c.Iop;
4884         if ((op & ~0x0F) == 0x70 && c.Iflags & CFjmp16 ||
4885             (op == JMP && !(c.Iflags & CFjmp5)))
4886         {
4887           L1:
4888             switch (c.IFL2)
4889             {
4890                 case FLblock:
4891                     if (flag)           // no offsets yet, don't optimize
4892                         goto L3;
4893                     disp = c.IEV2.Vblock.Boffset - offset - csize;
4894 
4895                     /* If this is a forward branch, and there is an aligned
4896                      * block intervening, it is possible that shrinking
4897                      * the jump instruction will cause it to be out of
4898                      * range of the target. This happens if the alignment
4899                      * prevents the target block from moving correspondingly
4900                      * closer.
4901                      */
4902                     if (disp >= 0x7F-4 && c.IEV2.Vblock.Boffset > offset)
4903                     {   /* Look for intervening alignment
4904                          */
4905                         for (block *b = bl.Bnext; b; b = b.Bnext)
4906                         {
4907                             if (b.Balign)
4908                             {
4909                                 bl.Bflags &= ~BFLjmpoptdone;   // some JMPs left
4910                                 goto L3;
4911                             }
4912                             if (b == c.IEV2.Vblock)
4913                                 break;
4914                         }
4915                     }
4916 
4917                     break;
4918 
4919                 case FLcode:
4920                 {
4921                     code *cr;
4922 
4923                     disp = 0;
4924 
4925                     ct = c.IEV2.Vcode;         /* target of branch     */
4926                     assert(ct.Iflags & (CFtarg | CFtarg2));
4927                     for (cr = cn; cr; cr = code_next(cr))
4928                     {
4929                         if (cr == ct)
4930                             break;
4931                         disp += calccodsize(cr);
4932                     }
4933 
4934                     if (!cr)
4935                     {   // Didn't find it in forward search. Try backwards jump
4936                         int s = 0;
4937                         disp = 0;
4938                         for (cr = bl.Bcode; cr != cn; cr = code_next(cr))
4939                         {
4940                             assert(cr != null); // must have found it
4941                             if (cr == ct)
4942                                 s = 1;
4943                             if (s)
4944                                 disp += calccodsize(cr);
4945                         }
4946                     }
4947 
4948                     if (config.flags4 & CFG4optimized && !flag)
4949                     {
4950                         /* Propagate branch forward past junk   */
4951                         while (1)
4952                         {
4953                             if (ct.Iop == NOP ||
4954                                 ct.Iop == (ESCAPE | ESClinnum))
4955                             {
4956                                 ct = code_next(ct);
4957                                 if (!ct)
4958                                     goto L2;
4959                             }
4960                             else
4961                             {
4962                                 c.IEV2.Vcode = ct;
4963                                 ct.Iflags |= CFtarg;
4964                                 break;
4965                             }
4966                         }
4967 
4968                         /* And eliminate jmps to jmps   */
4969                         if ((op == ct.Iop || ct.Iop == JMP) &&
4970                             (op == JMP || c.Iflags & CFjmp16))
4971                         {
4972                             c.IFL2 = ct.IFL2;
4973                             c.IEV2.Vcode = ct.IEV2.Vcode;
4974                             /*printf("eliminating branch\n");*/
4975                             goto L1;
4976                         }
4977                      L2:
4978                         { }
4979                     }
4980                 }
4981                     break;
4982 
4983                 default:
4984                     goto L3;
4985             }
4986 
4987             if (disp == 0)                      // bra to next instruction
4988             {
4989                 bytesaved += csize;
4990                 c.Iop = NOP;                   // del branch instruction
4991                 c.IEV2.Vcode = null;
4992                 c = cn;
4993                 if (!c)
4994                     break;
4995                 continue;
4996             }
4997             else if (cast(targ_size_t)cast(targ_schar)(disp - 2) == (disp - 2) &&
4998                      cast(targ_size_t)cast(targ_schar)disp == disp)
4999             {
5000                 if (op == JMP)
5001                 {
5002                     c.Iop = JMPS;              // JMP SHORT
5003                     bytesaved += I16 ? 1 : 3;
5004                 }
5005                 else                            // else Jcond
5006                 {
5007                     c.Iflags &= ~CFjmp16;      // a branch is ok
5008                     bytesaved += I16 ? 3 : 4;
5009 
5010                     // Replace a cond jump around a call to a function that
5011                     // never returns with a cond jump to that function.
5012                     if (config.flags4 & CFG4optimized &&
5013                         config.target_cpu >= TARGET_80386 &&
5014                         disp == (I16 ? 3 : 5) &&
5015                         cn &&
5016                         cn.Iop == CALL &&
5017                         cn.IFL2 == FLfunc &&
5018                         cn.IEV2.Vsym.Sflags & SFLexit &&
5019                         !(cn.Iflags & (CFtarg | CFtarg2))
5020                        )
5021                     {
5022                         cn.Iop = 0x0F00 | ((c.Iop & 0x0F) ^ 0x81);
5023                         c.Iop = NOP;
5024                         c.IEV2.Vcode = null;
5025                         bytesaved++;
5026 
5027                         // If nobody else points to ct, we can remove the CFtarg
5028                         if (flag && ct)
5029                         {
5030                             code *cx;
5031                             for (cx = bl.Bcode; 1; cx = code_next(cx))
5032                             {
5033                                 if (!cx)
5034                                 {
5035                                     ct.Iflags &= ~CFtarg;
5036                                     break;
5037                                 }
5038                                 if (cx.IEV2.Vcode == ct)
5039                                     break;
5040                             }
5041                         }
5042                     }
5043                 }
5044                 csize = calccodsize(c);
5045             }
5046             else
5047                 bl.Bflags &= ~BFLjmpoptdone;   // some JMPs left
5048         }
5049 L3:
5050         if (cn)
5051         {
5052             offset += csize;
5053             c = cn;
5054         }
5055         else
5056             break;
5057     }
5058     //printf("bytesaved = x%x\n",bytesaved);
5059     return bytesaved;
5060 }
5061 
5062 
5063 /************************************************
5064  * Adjust all Soffset's of stack variables so they
5065  * are all relative to the frame pointer.
5066  */
5067 
5068 version (MARS)
5069 {
5070 void cod3_adjSymOffsets()
5071 {
5072     SYMIDX si;
5073 
5074     //printf("cod3_adjSymOffsets()\n");
5075     for (si = 0; si < globsym.length; si++)
5076     {
5077         //printf("\tglobsym[%d] = %p\n",si,globsym[si]);
5078         Symbol *s = globsym[si];
5079 
5080         switch (s.Sclass)
5081         {
5082             case SCparameter:
5083             case SCregpar:
5084             case SCshadowreg:
5085 //printf("s = '%s', Soffset = x%x, Para.size = x%x, EBPtoESP = x%x\n", s.Sident, s.Soffset, Para.size, EBPtoESP);
5086                 s.Soffset += Para.size;
5087                 if (0 && !(funcsym_p.Sfunc.Fflags3 & Fmember))
5088                 {
5089                     if (!hasframe)
5090                         s.Soffset += EBPtoESP;
5091                     if (funcsym_p.Sfunc.Fflags3 & Fnested)
5092                         s.Soffset += REGSIZE;
5093                 }
5094                 break;
5095 
5096             case SCfastpar:
5097 //printf("\tfastpar %s %p Soffset %x Fast.size %x BPoff %x\n", s.Sident, s, (int)s.Soffset, (int)Fast.size, (int)BPoff);
5098                 s.Soffset += Fast.size + BPoff;
5099                 break;
5100 
5101             case SCauto:
5102             case SCregister:
5103                 if (s.Sfl == FLfast)
5104                     s.Soffset += Fast.size + BPoff;
5105                 else
5106 //printf("s = '%s', Soffset = x%x, Auto.size = x%x, BPoff = x%x EBPtoESP = x%x\n", s.Sident, (int)s.Soffset, (int)Auto.size, (int)BPoff, (int)EBPtoESP);
5107 //              if (!(funcsym_p.Sfunc.Fflags3 & Fnested))
5108                     s.Soffset += Auto.size + BPoff;
5109                 break;
5110 
5111             case SCbprel:
5112                 break;
5113 
5114             default:
5115                 continue;
5116         }
5117         static if (0)
5118         {
5119             if (!hasframe)
5120                 s.Soffset += EBPtoESP;
5121         }
5122     }
5123 }
5124 
5125 }
5126 
5127 /*******************************
5128  * Take symbol info in union ev and replace it with a real address
5129  * in Vpointer.
5130  */
5131 
5132 void assignaddr(block *bl)
5133 {
5134     int EBPtoESPsave = EBPtoESP;
5135     int hasframesave = hasframe;
5136 
5137     if (bl.Bflags & BFLoutsideprolog)
5138     {
5139         EBPtoESP = -REGSIZE;
5140         hasframe = 0;
5141     }
5142     assignaddrc(bl.Bcode);
5143     hasframe = hasframesave;
5144     EBPtoESP = EBPtoESPsave;
5145 }
5146 
5147 void assignaddrc(code *c)
5148 {
5149     int sn;
5150     Symbol *s;
5151     ubyte ins,rm;
5152     targ_size_t soff;
5153     targ_size_t base;
5154 
5155     base = EBPtoESP;
5156     for (; c; c = code_next(c))
5157     {
5158         debug
5159         {
5160         if (0)
5161         {       printf("assignaddrc()\n");
5162                 code_print(c);
5163         }
5164         if (code_next(c) && code_next(code_next(c)) == c)
5165             assert(0);
5166         }
5167 
5168         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
5169             ins = vex_inssize(c);
5170         else if ((c.Iop & 0xFFFD00) == 0x0F3800)
5171             ins = inssize2[(c.Iop >> 8) & 0xFF];
5172         else if ((c.Iop & 0xFF00) == 0x0F00)
5173             ins = inssize2[c.Iop & 0xFF];
5174         else if ((c.Iop & 0xFF) == ESCAPE)
5175         {
5176             if (c.Iop == (ESCAPE | ESCadjesp))
5177             {
5178                 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,(long)c.IEV1.Vint);
5179                 EBPtoESP += c.IEV1.Vint;
5180                 c.Iop = NOP;
5181             }
5182             else if (c.Iop == (ESCAPE | ESCfixesp))
5183             {
5184                 //printf("fix ESP\n");
5185                 if (hasframe)
5186                 {
5187                     // LEA ESP,-EBPtoESP[EBP]
5188                     c.Iop = LEA;
5189                     if (c.Irm & 8)
5190                         c.Irex |= REX_R;
5191                     c.Irm = modregrm(2,SP,BP);
5192                     c.Iflags = CFoff;
5193                     c.IFL1 = FLconst;
5194                     c.IEV1.Vuns = -EBPtoESP;
5195                     if (enforcealign)
5196                     {
5197                         // AND ESP, -STACKALIGN
5198                         code *cn = code_calloc();
5199                         cn.Iop = 0x81;
5200                         cn.Irm = modregrm(3, 4, SP);
5201                         cn.Iflags = CFoff;
5202                         cn.IFL2 = FLconst;
5203                         cn.IEV2.Vsize_t = -STACKALIGN;
5204                         if (I64)
5205                             c.Irex |= REX_W;
5206                         cn.next = c.next;
5207                         c.next = cn;
5208                     }
5209                 }
5210             }
5211             else if (c.Iop == (ESCAPE | ESCframeptr))
5212             {   // Convert to load of frame pointer
5213                 // c.Irm is the register to use
5214                 if (hasframe && !enforcealign)
5215                 {   // MOV reg,EBP
5216                     c.Iop = 0x89;
5217                     if (c.Irm & 8)
5218                         c.Irex |= REX_B;
5219                     c.Irm = modregrm(3,BP,c.Irm & 7);
5220                 }
5221                 else
5222                 {   // LEA reg,EBPtoESP[ESP]
5223                     c.Iop = LEA;
5224                     if (c.Irm & 8)
5225                         c.Irex |= REX_R;
5226                     c.Irm = modregrm(2,c.Irm & 7,4);
5227                     c.Isib = modregrm(0,4,SP);
5228                     c.Iflags = CFoff;
5229                     c.IFL1 = FLconst;
5230                     c.IEV1.Vuns = EBPtoESP;
5231                 }
5232             }
5233             if (I64)
5234                 c.Irex |= REX_W;
5235             continue;
5236         }
5237         else
5238             ins = inssize[c.Iop & 0xFF];
5239         if (!(ins & M) ||
5240             ((rm = c.Irm) & 0xC0) == 0xC0)
5241             goto do2;           /* if no first operand          */
5242         if (is32bitaddr(I32,c.Iflags))
5243         {
5244 
5245             if (
5246                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
5247                )
5248                 goto do2;       /* if no first operand  */
5249         }
5250         else
5251         {
5252             if (
5253                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
5254                )
5255                 goto do2;       /* if no first operand  */
5256         }
5257         s = c.IEV1.Vsym;
5258         switch (c.IFL1)
5259         {
5260             case FLdata:
5261                 if (config.objfmt == OBJ_OMF && s.Sclass != SCcomdat && s.Sclass != SCextern)
5262                 {
5263                     version (MARS)
5264                     {
5265                         c.IEV1.Vseg = s.Sseg;
5266                     }
5267                     else
5268                     {
5269                         c.IEV1.Vseg = DATA;
5270                     }
5271                     c.IEV1.Vpointer += s.Soffset;
5272                     c.IFL1 = FLdatseg;
5273                 }
5274                 else
5275                     c.IFL1 = FLextern;
5276                 goto do2;
5277 
5278             case FLudata:
5279                 if (config.objfmt == OBJ_OMF)
5280                 {
5281                     version (MARS)
5282                     {
5283                         c.IEV1.Vseg = s.Sseg;
5284                     }
5285                     else
5286                     {
5287                         c.IEV1.Vseg = UDATA;
5288                     }
5289                     c.IEV1.Vpointer += s.Soffset;
5290                     c.IFL1 = FLdatseg;
5291                 }
5292                 else
5293                     c.IFL1 = FLextern;
5294                 goto do2;
5295 
5296             case FLtlsdata:
5297                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5298                     c.IFL1 = FLextern;
5299                 goto do2;
5300 
5301             case FLdatseg:
5302                 //c.IEV1.Vseg = DATA;
5303                 goto do2;
5304 
5305             case FLfardata:
5306             case FLcsdata:
5307             case FLpseudo:
5308                 goto do2;
5309 
5310             case FLstack:
5311                 //printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n",
5312                 //s.Soffset,EBPtoESP,base,c.IEV1.Vpointer);
5313                 c.IEV1.Vpointer += s.Soffset + EBPtoESP - base - EEStack.offset;
5314                 break;
5315 
5316             case FLfast:
5317                 soff = Fast.size;
5318                 goto L1;
5319 
5320             case FLreg:
5321             case FLauto:
5322                 soff = Auto.size;
5323             L1:
5324                 if (Symbol_Sisdead(s, anyiasm))
5325                 {
5326                     c.Iop = NOP;               // remove references to it
5327                     continue;
5328                 }
5329                 if (s.Sfl == FLreg && c.IEV1.Vpointer < 2)
5330                 {
5331                     reg_t reg = s.Sreglsw;
5332 
5333                     assert(!(s.Sregm & ~mask(reg)));
5334                     if (c.IEV1.Vpointer == 1)
5335                     {
5336                         assert(reg < 4);    /* must be a BYTEREGS   */
5337                         reg |= 4;           /* convert to high byte reg */
5338                     }
5339                     if (reg & 8)
5340                     {
5341                         assert(I64);
5342                         c.Irex |= REX_B;
5343                         reg &= 7;
5344                     }
5345                     c.Irm = (c.Irm & modregrm(0,7,0))
5346                             | modregrm(3,0,reg);
5347                     assert(c.Iop != LES && c.Iop != LEA);
5348                     goto do2;
5349                 }
5350                 else
5351                 {   c.IEV1.Vpointer += s.Soffset + soff + BPoff;
5352                     if (s.Sflags & SFLunambig)
5353                         c.Iflags |= CFunambig;
5354             L2:
5355                     if (!hasframe || (enforcealign && c.IFL1 != FLpara))
5356                     {   /* Convert to ESP relative address instead of EBP */
5357                         assert(!I16);
5358                         c.IEV1.Vpointer += EBPtoESP;
5359                         ubyte crm = c.Irm;
5360                         if ((crm & 7) == 4)              // if SIB byte
5361                         {
5362                             assert((c.Isib & 7) == BP);
5363                             assert((crm & 0xC0) != 0);
5364                             c.Isib = (c.Isib & ~7) | modregrm(0,0,SP);
5365                         }
5366                         else
5367                         {
5368                             assert((crm & 7) == 5);
5369                             c.Irm = (crm & modregrm(0,7,0))
5370                                     | modregrm(2,0,4);
5371                             c.Isib = modregrm(0,4,SP);
5372                         }
5373                     }
5374                 }
5375                 break;
5376 
5377             case FLpara:
5378 //printf("s = %s, Soffset = %d, Para.size = %d, BPoff = %d, EBPtoESP = %d\n", s.Sident.ptr, s.Soffset, Para.size, BPoff, EBPtoESP);
5379                 soff = Para.size - BPoff;    // cancel out add of BPoff
5380                 goto L1;
5381 
5382             case FLfltreg:
5383                 c.IEV1.Vpointer += Foff + BPoff;
5384                 c.Iflags |= CFunambig;
5385                 goto L2;
5386 
5387             case FLallocatmp:
5388                 c.IEV1.Vpointer += Alloca.offset + BPoff;
5389                 goto L2;
5390 
5391             case FLfuncarg:
5392                 c.IEV1.Vpointer += cgstate.funcarg.offset + BPoff;
5393                 goto L2;
5394 
5395             case FLbprel:
5396                 c.IEV1.Vpointer += s.Soffset;
5397                 break;
5398 
5399             case FLcs:
5400                 sn = c.IEV1.Vuns;
5401                 if (!CSE.loaded(sn))            // if never loaded
5402                 {
5403                     c.Iop = NOP;
5404                     continue;
5405                 }
5406                 c.IEV1.Vpointer = CSE.offset(sn) + CSoff + BPoff;
5407                 c.Iflags |= CFunambig;
5408                 goto L2;
5409 
5410             case FLregsave:
5411                 sn = c.IEV1.Vuns;
5412                 c.IEV1.Vpointer = sn + regsave.off + BPoff;
5413                 c.Iflags |= CFunambig;
5414                 goto L2;
5415 
5416             case FLndp:
5417                 version (MARS)
5418                 {
5419                     assert(c.IEV1.Vuns < global87.save.length);
5420                 }
5421                 c.IEV1.Vpointer = c.IEV1.Vuns * tysize(TYldouble) + NDPoff + BPoff;
5422                 c.Iflags |= CFunambig;
5423                 goto L2;
5424 
5425             case FLoffset:
5426                 break;
5427 
5428             case FLlocalsize:
5429                 c.IEV1.Vpointer += localsize;
5430                 break;
5431 
5432             case FLconst:
5433             default:
5434                 goto do2;
5435         }
5436         c.IFL1 = FLconst;
5437     do2:
5438         /* Ignore TEST (F6 and F7) opcodes      */
5439         if (!(ins & T)) goto done;              /* if no second operand */
5440         s = c.IEV2.Vsym;
5441         switch (c.IFL2)
5442         {
5443             case FLdata:
5444                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5445                 {
5446                     c.IFL2 = FLextern;
5447                     goto do2;
5448                 }
5449                 else
5450                 {
5451                     if (s.Sclass == SCcomdat)
5452                     {   c.IFL2 = FLextern;
5453                         goto do2;
5454                     }
5455                     c.IEV2.Vseg = MARS ? s.Sseg : DATA;
5456                     c.IEV2.Vpointer += s.Soffset;
5457                     c.IFL2 = FLdatseg;
5458                     goto done;
5459                 }
5460 
5461             case FLudata:
5462                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5463                 {
5464                     c.IFL2 = FLextern;
5465                     goto do2;
5466                 }
5467                 else
5468                 {
5469                     c.IEV2.Vseg = MARS ? s.Sseg : UDATA;
5470                     c.IEV2.Vpointer += s.Soffset;
5471                     c.IFL2 = FLdatseg;
5472                     goto done;
5473                 }
5474 
5475             case FLtlsdata:
5476                 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH)
5477                 {
5478                     c.IFL2 = FLextern;
5479                     goto do2;
5480                 }
5481                 goto done;
5482 
5483             case FLdatseg:
5484                 //c.IEV2.Vseg = DATA;
5485                 goto done;
5486 
5487             case FLcsdata:
5488             case FLfardata:
5489                 goto done;
5490 
5491             case FLreg:
5492             case FLpseudo:
5493                 assert(0);
5494                 /* NOTREACHED */
5495 
5496             case FLfast:
5497                 c.IEV2.Vpointer += s.Soffset + Fast.size + BPoff;
5498                 break;
5499 
5500             case FLauto:
5501                 c.IEV2.Vpointer += s.Soffset + Auto.size + BPoff;
5502             L3:
5503                 if (!hasframe || (enforcealign && c.IFL2 != FLpara))
5504                     /* Convert to ESP relative address instead of EBP */
5505                     c.IEV2.Vpointer += EBPtoESP;
5506                 break;
5507 
5508             case FLpara:
5509                 c.IEV2.Vpointer += s.Soffset + Para.size;
5510                 goto L3;
5511 
5512             case FLfltreg:
5513                 c.IEV2.Vpointer += Foff + BPoff;
5514                 goto L3;
5515 
5516             case FLallocatmp:
5517                 c.IEV2.Vpointer += Alloca.offset + BPoff;
5518                 goto L3;
5519 
5520             case FLfuncarg:
5521                 c.IEV2.Vpointer += cgstate.funcarg.offset + BPoff;
5522                 goto L3;
5523 
5524             case FLbprel:
5525                 c.IEV2.Vpointer += s.Soffset;
5526                 break;
5527 
5528             case FLstack:
5529                 c.IEV2.Vpointer += s.Soffset + EBPtoESP - base;
5530                 break;
5531 
5532             case FLcs:
5533             case FLndp:
5534             case FLregsave:
5535                 assert(0);
5536 
5537             case FLconst:
5538                 break;
5539 
5540             case FLlocalsize:
5541                 c.IEV2.Vpointer += localsize;
5542                 break;
5543 
5544             default:
5545                 goto done;
5546         }
5547         c.IFL2 = FLconst;
5548   done:
5549         { }
5550     }
5551 }
5552 
5553 /*******************************
5554  * Return offset from BP of symbol s.
5555  */
5556 
5557 targ_size_t cod3_bpoffset(Symbol *s)
5558 {
5559     targ_size_t offset;
5560 
5561     symbol_debug(s);
5562     offset = s.Soffset;
5563     switch (s.Sfl)
5564     {
5565         case FLpara:
5566             offset += Para.size;
5567             break;
5568 
5569         case FLfast:
5570             offset += Fast.size + BPoff;
5571             break;
5572 
5573         case FLauto:
5574             offset += Auto.size + BPoff;
5575             break;
5576 
5577         default:
5578             WRFL(cast(FL)s.Sfl);
5579             symbol_print(s);
5580             assert(0);
5581     }
5582     assert(hasframe);
5583     return offset;
5584 }
5585 
5586 
5587 /*******************************
5588  * Find shorter versions of the same instructions.
5589  * Does these optimizations:
5590  *      replaces jmps to the next instruction with NOPs
5591  *      sign extension of modregrm displacement
5592  *      sign extension of immediate data (can't do it for OR, AND, XOR
5593  *              as the opcodes are not defined)
5594  *      short versions for AX EA
5595  *      short versions for reg EA
5596  * Code is neither removed nor added.
5597  * Params:
5598  *      b = block for code (or null)
5599  *      c = code list to optimize
5600  */
5601 
5602 void pinholeopt(code *c,block *b)
5603 {
5604     targ_size_t a;
5605     uint mod;
5606     ubyte ins;
5607     int usespace;
5608     int useopsize;
5609     int space;
5610     block *bn;
5611 
5612     debug
5613     {
5614         __gshared int tested; if (!tested) { tested++; pinholeopt_unittest(); }
5615     }
5616 
5617     debug
5618     {
5619         code *cstart = c;
5620         if (debugc)
5621         {
5622             printf("+pinholeopt(%p)\n",c);
5623         }
5624     }
5625 
5626     if (b)
5627     {
5628         bn = b.Bnext;
5629         usespace = (config.flags4 & CFG4space && b.BC != BCasm);
5630         useopsize = (I16 || (config.flags4 & CFG4space && b.BC != BCasm));
5631     }
5632     else
5633     {
5634         bn = null;
5635         usespace = (config.flags4 & CFG4space);
5636         useopsize = (I16 || config.flags4 & CFG4space);
5637     }
5638     for (; c; c = code_next(c))
5639     {
5640     L1:
5641         opcode_t op = c.Iop;
5642         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
5643             ins = vex_inssize(c);
5644         else if ((op & 0xFFFD00) == 0x0F3800)
5645             ins = inssize2[(op >> 8) & 0xFF];
5646         else if ((op & 0xFF00) == 0x0F00)
5647             ins = inssize2[op & 0xFF];
5648         else
5649             ins = inssize[op & 0xFF];
5650         if (ins & M)            // if modregrm byte
5651         {
5652             int shortop = (c.Iflags & CFopsize) ? !I16 : I16;
5653             int local_BPRM = BPRM;
5654 
5655             if (c.Iflags & CFaddrsize)
5656                 local_BPRM ^= 5 ^ 6;    // toggle between 5 and 6
5657 
5658             uint rm = c.Irm;
5659             reg_t reg = rm & modregrm(0,7,0);          // isolate reg field
5660             reg_t ereg = rm & 7;
5661             //printf("c = %p, op = %02x rm = %02x\n", c, op, rm);
5662 
5663             /* If immediate second operand      */
5664             if ((ins & T ||
5665                  ((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0)))
5666                 ) &&
5667                 c.IFL2 == FLconst)
5668             {
5669                 int flags = c.Iflags & CFpsw;      /* if want result in flags */
5670                 targ_long u = c.IEV2.Vuns;
5671                 if (ins & E)
5672                     u = cast(byte) u;
5673                 else if (shortop)
5674                     u = cast(short) u;
5675 
5676                 // Replace CMP reg,0 with TEST reg,reg
5677                 if ((op & 0xFE) == 0x80 &&              // 80 is CMP R8,imm8; 81 is CMP reg,imm
5678                     rm >= modregrm(3,7,AX) &&
5679                     u == 0)
5680                 {
5681                     c.Iop = (op & 1) | 0x84;
5682                     c.Irm = modregrm(3,ereg,ereg);
5683                     if (c.Irex & REX_B)
5684                         c.Irex |= REX_R;
5685                     goto L1;
5686                 }
5687 
5688                 /* Optimize ANDs with an immediate constant             */
5689                 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0))
5690                 {
5691                     if (rm >= modregrm(3,4,AX))         // AND reg,imm
5692                     {
5693                         if (u == 0)
5694                         {
5695                             /* Replace with XOR reg,reg     */
5696                             c.Iop = 0x30 | (op & 1);
5697                             c.Irm = modregrm(3,ereg,ereg);
5698                             if (c.Irex & REX_B)
5699                                 c.Irex |= REX_R;
5700                             goto L1;
5701                         }
5702                         if (u == 0xFFFFFFFF && !flags)
5703                         {
5704                             c.Iop = NOP;
5705                             goto L1;
5706                         }
5707                     }
5708                     if (op == 0x81 && !flags)
5709                     {   // If we can do the operation in one byte
5710 
5711                         // If EA is not SI or DI
5712                         if ((rm < modregrm(3,4,SP) || I64) &&
5713                             (config.flags4 & CFG4space ||
5714                              config.target_cpu < TARGET_PentiumPro)
5715                            )
5716                         {
5717                             if ((u & 0xFFFFFF00) == 0xFFFFFF00)
5718                                 goto L2;
5719                             else if (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4))
5720                             {
5721                                 if (!shortop)
5722                                 {
5723                                     if ((u & 0xFFFF00FF) == 0xFFFF00FF)
5724                                         goto L3;
5725                                 }
5726                                 else
5727                                 {
5728                                     if ((u & 0xFF) == 0xFF)
5729                                         goto L3;
5730                                 }
5731                             }
5732                         }
5733                         if (!shortop && useopsize)
5734                         {
5735                             if ((u & 0xFFFF0000) == 0xFFFF0000)
5736                             {
5737                                 c.Iflags ^= CFopsize;
5738                                 goto L1;
5739                             }
5740                             if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX))
5741                             {
5742                                 c.IEV1.Voffset += 2; /* address MSW      */
5743                                 c.IEV2.Vuns >>= 16;
5744                                 c.Iflags ^= CFopsize;
5745                                 goto L1;
5746                             }
5747                             if (rm >= modregrm(3,4,AX))
5748                             {
5749                                 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64))
5750                                 {
5751                                     c.Iop = MOVZXb;     // MOVZX
5752                                     c.Irm = modregrm(3,ereg,ereg);
5753                                     if (c.Irex & REX_B)
5754                                         c.Irex |= REX_R;
5755                                     goto L1;
5756                                 }
5757                                 if (u == 0xFFFF)
5758                                 {
5759                                     c.Iop = MOVZXw;     // MOVZX
5760                                     c.Irm = modregrm(3,ereg,ereg);
5761                                     if (c.Irex & REX_B)
5762                                         c.Irex |= REX_R;
5763                                     goto L1;
5764                                 }
5765                             }
5766                         }
5767                     }
5768                 }
5769 
5770                 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */
5771                 if (!flags &&
5772                     (op == 0x81 || op == 0x80) &&
5773                     (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) ||  // ADD,OR
5774                      reg == modregrm(0,5,0) || reg == modregrm(0,6,0))    // SUB, XOR
5775                    )
5776                 {
5777                     if (u == 0)
5778                     {
5779                         c.Iop = NOP;
5780                         goto L1;
5781                     }
5782                     if (u == ~0 && reg == modregrm(0,6,0))  /* XOR  */
5783                     {
5784                         c.Iop = 0xF6 | (op & 1);       /* NOT  */
5785                         c.Irm ^= modregrm(0,6^2,0);
5786                         goto L1;
5787                     }
5788                     if (!shortop &&
5789                         useopsize &&
5790                         op == 0x81 &&
5791                         (u & 0xFFFF0000) == 0 &&
5792                         (reg == modregrm(0,6,0) || reg == modregrm(0,1,0)))
5793                     {
5794                         c.Iflags ^= CFopsize;
5795                         goto L1;
5796                     }
5797                 }
5798 
5799                 /* Look for TEST or OR or XOR with an immediate constant */
5800                 /* that we can replace with a byte operation            */
5801                 if (op == 0xF7 && reg == modregrm(0,0,0) ||
5802                     op == 0x81 && reg == modregrm(0,6,0) && !flags ||
5803                     op == 0x81 && reg == modregrm(0,1,0))
5804                 {
5805                     // See if we can replace a dword with a word
5806                     // (avoid for 32 bit instructions, because CFopsize
5807                     //  is too slow)
5808                     if (!shortop && useopsize)
5809                     {
5810                         if ((u & 0xFFFF0000) == 0)
5811                         {
5812                             c.Iflags ^= CFopsize;
5813                             goto L1;
5814                         }
5815                         /* If memory (not register) addressing mode     */
5816                         if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX))
5817                         {
5818                             c.IEV1.Voffset += 2; /* address MSW  */
5819                             c.IEV2.Vuns >>= 16;
5820                             c.Iflags ^= CFopsize;
5821                             goto L1;
5822                         }
5823                     }
5824 
5825                     // If EA is not SI or DI
5826                     if (rm < (modregrm(3,0,SP) | reg) &&
5827                         (usespace ||
5828                          config.target_cpu < TARGET_PentiumPro)
5829                        )
5830                     {
5831                         if ((u & 0xFFFFFF00) == 0)
5832                         {
5833                         L2: c.Iop--;           /* to byte instruction  */
5834                             c.Iflags &= ~CFopsize;
5835                             goto L1;
5836                         }
5837                         if (((u & 0xFFFF00FF) == 0 ||
5838                              (shortop && (u & 0xFF) == 0)) &&
5839                             (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4)))
5840                         {
5841                         L3:
5842                             c.IEV2.Vuns >>= 8;
5843                             if (rm >= (modregrm(3,0,AX) | reg))
5844                                 c.Irm |= 4;    /* AX.AH, BX.BH, etc. */
5845                             else
5846                                 c.IEV1.Voffset += 1;
5847                             goto L2;
5848                         }
5849                     }
5850 
5851                     // BUG: which is right?
5852                     //else if ((u & 0xFFFF0000) == 0)
5853 
5854                     else if (0 && op == 0xF7 &&
5855                              rm >= modregrm(3,0,SP) &&
5856                              (u & 0xFFFF0000) == 0)
5857 
5858                         c.Iflags &= ~CFopsize;
5859                 }
5860 
5861                 // Try to replace TEST reg,-1 with TEST reg,reg
5862                 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8
5863                 {
5864                     if ((u & 0xFF) == 0xFF)
5865                     {
5866                       L4:
5867                         c.Iop = 0x84;          // TEST regL,regL
5868                         c.Irm = modregrm(3,ereg,ereg);
5869                         if (c.Irex & REX_B)
5870                             c.Irex |= REX_R;
5871                         c.Iflags &= ~CFopsize;
5872                         goto L1;
5873                     }
5874                 }
5875                 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4))
5876                 {
5877                     if (u == 0xFF)
5878                     {
5879                         if (ereg & 4) // SIL,DIL,BPL,SPL need REX prefix
5880                             c.Irex |= REX;
5881                         goto L4;
5882                     }
5883                     if ((u & 0xFFFF) == 0xFF00 && shortop && !c.Irex && ereg < 4)
5884                     {
5885                         ereg |= 4;                /* to regH      */
5886                         goto L4;
5887                     }
5888                 }
5889 
5890                 /* Look for sign extended immediate data */
5891                 if (cast(byte) u == u)
5892                 {
5893                     if (op == 0x81)
5894                     {
5895                         if (reg != 0x08 && reg != 0x20 && reg != 0x30)
5896                             c.Iop = op = 0x83;         /* 8 bit sgn ext */
5897                     }
5898                     else if (op == 0x69)                /* IMUL rw,ew,dw */
5899                         c.Iop = op = 0x6B;             /* IMUL rw,ew,db */
5900                 }
5901 
5902                 // Look for SHIFT EA,imm8 we can replace with short form
5903                 if (u == 1 && ((op & 0xFE) == 0xC0))
5904                     c.Iop |= 0xD0;
5905 
5906             } /* if immediate second operand */
5907 
5908             /* Look for AX short form */
5909             if (ins & A)
5910             {
5911                 if (rm == modregrm(0,AX,local_BPRM) &&
5912                     !(c.Irex & REX_R) &&               // and it's AX, not R8
5913                     (op & ~3) == 0x88 &&
5914                     !I64)
5915                 {
5916                     op = ((op & 3) + 0xA0) ^ 2;
5917                     /* 8A. A0 */
5918                     /* 8B. A1 */
5919                     /* 88. A2 */
5920                     /* 89. A3 */
5921                     c.Iop = op;
5922                     c.IFL2 = c.IFL1;
5923                     c.IEV2 = c.IEV1;
5924                 }
5925 
5926                 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2   */
5927                 else if (!I16 &&
5928                          (op == 0x89 || op == 0x8B) &&
5929                          (rm & 0xC0) == 0xC0 &&
5930                          (!b || b.BC != BCasm)
5931                         )
5932                     c.Iflags &= ~CFopsize;
5933 
5934                 // If rm is AX
5935                 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c.Irex & (REX_R | REX_B)))
5936                 {
5937                     switch (op)
5938                     {
5939                         case 0x80:  op = reg | 4; break;
5940                         case 0x81:  op = reg | 5; break;
5941                         case 0x87:  op = 0x90 + (reg>>3); break;    // XCHG
5942 
5943                         case 0xF6:
5944                             if (reg == 0)
5945                                 op = 0xA8;  /* TEST AL,immed8       */
5946                             break;
5947 
5948                         case 0xF7:
5949                             if (reg == 0)
5950                                 op = 0xA9;  /* TEST AX,immed16      */
5951                             break;
5952 
5953                         default:
5954                             break;
5955                     }
5956                     c.Iop = op;
5957                 }
5958             }
5959 
5960             /* Look for reg short form */
5961             if ((ins & R) && (rm & 0xC0) == 0xC0)
5962             {
5963                 switch (op)
5964                 {
5965                     case 0xC6:  op = 0xB0 + ereg; break;
5966                     case 0xC7: // if no sign extension
5967                         if (!(c.Irex & REX_W && c.IEV2.Vint < 0))
5968                         {
5969                             c.Irm = 0;
5970                             c.Irex &= ~REX_W;
5971                             op = 0xB8 + ereg;
5972                         }
5973                         break;
5974 
5975                     case 0xFF:
5976                         switch (reg)
5977                         {   case 6<<3: op = 0x50+ereg; break;/* PUSH*/
5978                             case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/
5979                             case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/
5980                             default: break;
5981                         }
5982                         break;
5983 
5984                     case 0x8F:  op = 0x58 + ereg; break;
5985                     case 0x87:
5986                         if (reg == 0 && !(c.Irex & (REX_R | REX_B))) // Issue 12968: Needed to ensure it's referencing RAX, not R8
5987                             op = 0x90 + ereg;
5988                         break;
5989 
5990                     default:
5991                         break;
5992                 }
5993                 c.Iop = op;
5994             }
5995 
5996             // Look to remove redundant REX prefix on XOR
5997             if (c.Irex == REX_W // ignore ops involving R8..R15
5998                 && (op == 0x31 || op == 0x33) // XOR
5999                 && ((rm & 0xC0) == 0xC0) // register direct
6000                 && ((reg >> 3) == ereg)) // register with itself
6001             {
6002                 c.Irex = 0;
6003             }
6004 
6005             // Look to replace SHL reg,1 with ADD reg,reg
6006             if ((op & ~1) == 0xD0 &&
6007                      (rm & modregrm(3,7,0)) == modregrm(3,4,0) &&
6008                      config.target_cpu >= TARGET_80486)
6009             {
6010                 c.Iop &= 1;
6011                 c.Irm = cast(ubyte)((rm & modregrm(3,0,7)) | (ereg << 3));
6012                 if (c.Irex & REX_B)
6013                     c.Irex |= REX_R;
6014                 if (!(c.Iflags & CFpsw) && !I16)
6015                     c.Iflags &= ~CFopsize;
6016                 goto L1;
6017             }
6018 
6019             /* Look for sign extended modregrm displacement, or 0
6020              * displacement.
6021              */
6022 
6023             if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp
6024                 c.IFL1 == FLconst)      // and it's a constant
6025             {
6026                 a = c.IEV1.Vpointer;
6027                 if (a == 0 && (rm & 7) != local_BPRM &&         // if 0[disp]
6028                     !(local_BPRM == 5 && (rm & 7) == 4 && (c.Isib & 7) == BP)
6029                    )
6030                     c.Irm &= 0x3F;
6031                 else if (!I16)
6032                 {
6033                     if (cast(targ_size_t)cast(targ_schar)a == a)
6034                         c.Irm ^= 0xC0;                 /* do 8 sx      */
6035                 }
6036                 else if ((cast(targ_size_t)cast(targ_schar)a & 0xFFFF) == (a & 0xFFFF))
6037                     c.Irm ^= 0xC0;                     /* do 8 sx      */
6038             }
6039 
6040             /* Look for LEA reg,[ireg], replace with MOV reg,ireg       */
6041             if (op == LEA)
6042             {
6043                 rm = c.Irm & 7;
6044                 mod = c.Irm & modregrm(3,0,0);
6045                 if (mod == 0)
6046                 {
6047                     if (!I16)
6048                     {
6049                         switch (rm)
6050                         {
6051                             case 4:
6052                             case 5:
6053                                 break;
6054 
6055                             default:
6056                                 c.Irm |= modregrm(3,0,0);
6057                                 c.Iop = 0x8B;
6058                                 break;
6059                         }
6060                     }
6061                     else
6062                     {
6063                         switch (rm)
6064                         {
6065                             case 4:     rm = modregrm(3,0,SI);  goto L6;
6066                             case 5:     rm = modregrm(3,0,DI);  goto L6;
6067                             case 7:     rm = modregrm(3,0,BX);  goto L6;
6068                             L6:     c.Irm = cast(ubyte)(rm + reg);
6069                                     c.Iop = 0x8B;
6070                                     break;
6071 
6072                             default:
6073                                     break;
6074                         }
6075                     }
6076                 }
6077 
6078                 /* replace LEA reg,0[BP] with MOV reg,BP        */
6079                 else if (mod == modregrm(1,0,0) && rm == local_BPRM &&
6080                         c.IFL1 == FLconst && c.IEV1.Vpointer == 0)
6081                 {
6082                     c.Iop = 0x8B;          /* MOV reg,BP   */
6083                     c.Irm = cast(ubyte)(modregrm(3,0,BP) + reg);
6084                 }
6085             }
6086 
6087             // Replace [R13] with 0[R13]
6088             if (c.Irex & REX_B && ((c.Irm & modregrm(3,0,7)) == modregrm(0,0,BP) ||
6089                                     issib(c.Irm) && (c.Irm & modregrm(3,0,0)) == 0 && (c.Isib & 7) == BP))
6090             {
6091                 c.Irm |= modregrm(1,0,0);
6092                 c.IFL1 = FLconst;
6093                 c.IEV1.Vpointer = 0;
6094             }
6095         }
6096         else if (!(c.Iflags & CFvex))
6097         {
6098             switch (op)
6099             {
6100                 default:
6101                     // Look for MOV r64, immediate
6102                     if ((c.Irex & REX_W) && (op & ~7) == 0xB8)
6103                     {
6104                         /* Look for zero extended immediate data */
6105                         if (c.IEV2.Vsize_t == c.IEV2.Vuns)
6106                         {
6107                             c.Irex &= ~REX_W;
6108                         }
6109                         /* Look for sign extended immediate data */
6110                         else if (c.IEV2.Vsize_t == c.IEV2.Vint)
6111                         {
6112                             c.Irm = modregrm(3,0,op & 7);
6113                             c.Iop = op = 0xC7;
6114                             c.IEV2.Vsize_t = c.IEV2.Vuns;
6115                         }
6116                     }
6117                     if ((op & ~0x0F) != 0x70)
6118                         break;
6119                     goto case JMP;
6120 
6121                 case JMP:
6122                     switch (c.IFL2)
6123                     {
6124                         case FLcode:
6125                             if (c.IEV2.Vcode == code_next(c))
6126                             {
6127                                 c.Iop = NOP;
6128                                 continue;
6129                             }
6130                             break;
6131 
6132                         case FLblock:
6133                             if (!code_next(c) && c.IEV2.Vblock == bn)
6134                             {
6135                                 c.Iop = NOP;
6136                                 continue;
6137                             }
6138                             break;
6139 
6140                         case FLconst:
6141                         case FLfunc:
6142                         case FLextern:
6143                             break;
6144 
6145                         default:
6146                             WRFL(cast(FL)c.IFL2);
6147                             assert(0);
6148                     }
6149                     break;
6150 
6151                 case 0x68:                      // PUSH immed16
6152                     if (c.IFL2 == FLconst)
6153                     {
6154                         targ_long u = c.IEV2.Vuns;
6155                         if (I64 ||
6156                             ((c.Iflags & CFopsize) ? I16 : I32))
6157                         {   // PUSH 32/64 bit operand
6158                             if (u == cast(byte) u)
6159                                 c.Iop = 0x6A;          // PUSH immed8
6160                         }
6161                         else // PUSH 16 bit operand
6162                         {
6163                             if (cast(short)u == cast(byte) u)
6164                                 c.Iop = 0x6A;          // PUSH immed8
6165                         }
6166                     }
6167                     break;
6168             }
6169         }
6170     }
6171 
6172     debug
6173     if (debugc)
6174     {
6175         printf("-pinholeopt(%p)\n",cstart);
6176         for (c = cstart; c; c = code_next(c))
6177             code_print(c);
6178     }
6179 }
6180 
6181 
6182 debug
6183 {
6184 private void pinholeopt_unittest()
6185 {
6186     //printf("pinholeopt_unittest()\n");
6187     static struct CS
6188     {
6189         uint model,op,ea;
6190         targ_size_t ev1,ev2;
6191         uint flags;
6192     }
6193     __gshared CS[2][22] tests =
6194     [
6195         // XOR reg,immed                            NOT regL
6196         [ { 16,0x81,modregrm(3,6,BX),0,0xFF,0 },    { 0,0xF6,modregrm(3,2,BX),0,0xFF } ],
6197 
6198         // MOV 0[BX],3                               MOV [BX],3
6199         [ { 16,0xC7,modregrm(2,0,7),0,3 },          { 0,0xC7,modregrm(0,0,7),0,3 } ],
6200 
6201 /+      // only if config.flags4 & CFG4space
6202         // TEST regL,immed8
6203         [ { 0,0xF6,modregrm(3,0,BX),0,0xFF,0 },    { 0,0x84,modregrm(3,BX,BX),0,0xFF }],
6204         [ { 0,0xF7,modregrm(3,0,BX),0,0xFF,0 },    { 0,0x84,modregrm(3,BX,BX),0,0xFF }],
6205         [ { 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 },  { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }],
6206         [ { 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 },  { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }],
6207 +/
6208 
6209         // PUSH immed => PUSH immed8
6210         [ { 0,0x68,0,0,0 },    { 0,0x6A,0,0,0 }],
6211         [ { 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }],
6212         [ { 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }],
6213         [ { 16,0x68,0,0,0,CFopsize },    { 0,0x6A,0,0,0,CFopsize }],
6214         [ { 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }],
6215         [ { 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }],
6216         [ { 16,0x68,0,0,0x10000,0 },     { 0,0x6A,0,0,0x10000,0 }],
6217         [ { 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }],
6218         [ { 32,0x68,0,0,0,CFopsize },    { 0,0x6A,0,0,0,CFopsize }],
6219         [ { 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }],
6220         [ { 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }],
6221         [ { 32,0x68,0,0,0x10000,CFopsize },    { 0,0x6A,0,0,0x10000,CFopsize }],
6222         [ { 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }],
6223 
6224         // clear r64, for r64 != R8..R15
6225         [ { 64,0x31,0x800C0,0,0,0 }, { 0,0x31,0xC0,0,0,0}],
6226         [ { 64,0x33,0x800C0,0,0,0 }, { 0,0x33,0xC0,0,0,0}],
6227 
6228         // MOV r64, immed
6229         [ { 64,0xC7,0x800C0,0,0xFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,0xFFFFFFFF,0}],
6230         [ { 64,0xC7,0x800C0,0,0x7FFFFFFF,0 }, { 0,0xB8,0,0,0x7FFFFFFF,0}],
6231         [ { 64,0xB8,0x80000,0,0xFFFFFFFF,0 }, { 0,0xB8,0,0,0xFFFFFFFF,0 }],
6232         [ { 64,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }, { 0,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }],
6233         [ { 64,0xB8,0x80000,0,cast(targ_size_t)0xFFFFFFFFFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,cast(targ_size_t)0xFFFFFFFF,0}],
6234     ];
6235 
6236     //config.flags4 |= CFG4space;
6237     for (int i = 0; i < tests.length; i++)
6238     {   CS *pin  = &tests[i][0];
6239         CS *pout = &tests[i][1];
6240         code cs = void;
6241         memset(&cs, 0, cs.sizeof);
6242         if (pin.model)
6243         {
6244             if (I16 && pin.model != 16)
6245                 continue;
6246             if (I32 && pin.model != 32)
6247                 continue;
6248             if (I64 && pin.model != 64)
6249                 continue;
6250         }
6251         //printf("[%d]\n", i);
6252         cs.Iop = pin.op;
6253         cs.Iea = pin.ea;
6254         cs.IFL1 = FLconst;
6255         cs.IFL2 = FLconst;
6256         cs.IEV1.Vsize_t = pin.ev1;
6257         cs.IEV2.Vsize_t = pin.ev2;
6258         cs.Iflags = pin.flags;
6259         pinholeopt(&cs, null);
6260         if (cs.Iop != pout.op)
6261         {   printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout.op);
6262             assert(0);
6263         }
6264         assert(cs.Iea == pout.ea);
6265         assert(cs.IEV1.Vsize_t == pout.ev1);
6266         assert(cs.IEV2.Vsize_t == pout.ev2);
6267         assert(cs.Iflags == pout.flags);
6268     }
6269 }
6270 }
6271 
6272 void simplify_code(code* c)
6273 {
6274     reg_t reg;
6275     if (config.flags4 & CFG4optimized &&
6276         (c.Iop == 0x81 || c.Iop == 0x80) &&
6277         c.IFL2 == FLconst &&
6278         reghasvalue((c.Iop == 0x80) ? BYTEREGS : ALLREGS,I64 ? c.IEV2.Vsize_t : c.IEV2.Vlong,&reg) &&
6279         !(I16 && c.Iflags & CFopsize)
6280        )
6281     {
6282         // See if we can replace immediate instruction with register instruction
6283         static immutable ubyte[8] regop =
6284                 [ 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 ];
6285 
6286         //printf("replacing 0x%02x, val = x%lx\n",c.Iop,c.IEV2.Vlong);
6287         c.Iop = regop[(c.Irm & modregrm(0,7,0)) >> 3] | (c.Iop & 1);
6288         code_newreg(c, reg);
6289         if (I64 && !(c.Iop & 1) && (reg & 4))
6290             c.Irex |= REX;
6291     }
6292 }
6293 
6294 /**************************
6295  * Compute jump addresses for FLcode.
6296  * Note: only works for forward referenced code.
6297  *       only direct jumps and branches are detected.
6298  *       LOOP instructions only work for backward refs.
6299  */
6300 
6301 void jmpaddr(code *c)
6302 {
6303     code* ci,cn,ctarg,cstart;
6304     targ_size_t ad;
6305 
6306     //printf("jmpaddr()\n");
6307     cstart = c;                           /* remember start of code       */
6308     while (c)
6309     {
6310         const op = c.Iop;
6311         if (op <= 0xEB &&
6312             inssize[op] & T &&   // if second operand
6313             c.IFL2 == FLcode &&
6314             ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL))
6315         {
6316             ci = code_next(c);
6317             ctarg = c.IEV2.Vcode;  /* target code                  */
6318             ad = 0;                 /* IP displacement              */
6319             while (ci && ci != ctarg)
6320             {
6321                 ad += calccodsize(ci);
6322                 ci = code_next(ci);
6323             }
6324             if (!ci)
6325                 goto Lbackjmp;      // couldn't find it
6326             if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL)
6327                 c.IEV2.Vpointer = ad;
6328             else                    /* else conditional             */
6329             {
6330                 if (!(c.Iflags & CFjmp16))     /* if branch    */
6331                     c.IEV2.Vpointer = ad;
6332                 else            /* branch around a long jump    */
6333                 {
6334                     cn = code_next(c);
6335                     c.next = code_calloc();
6336                     code_next(c).next = cn;
6337                     c.Iop = op ^ 1;        /* converse jmp */
6338                     c.Iflags &= ~CFjmp16;
6339                     c.IEV2.Vpointer = I16 ? 3 : 5;
6340                     cn = code_next(c);
6341                     cn.Iop = JMP;          /* long jump    */
6342                     cn.IFL2 = FLconst;
6343                     cn.IEV2.Vpointer = ad;
6344                 }
6345             }
6346             c.IFL2 = FLconst;
6347         }
6348         if (op == LOOP && c.IFL2 == FLcode)    /* backwards refs       */
6349         {
6350           Lbackjmp:
6351             ctarg = c.IEV2.Vcode;
6352             for (ci = cstart; ci != ctarg; ci = code_next(ci))
6353                 if (!ci || ci == c)
6354                     assert(0);
6355             ad = 2;                 /* - IP displacement            */
6356             while (ci != c)
6357             {
6358                 assert(ci);
6359                 ad += calccodsize(ci);
6360                 ci = code_next(ci);
6361             }
6362             c.IEV2.Vpointer = (-ad) & 0xFF;
6363             c.IFL2 = FLconst;
6364         }
6365         c = code_next(c);
6366     }
6367 }
6368 
6369 /*******************************
6370  * Calculate bl.Bsize.
6371  */
6372 
6373 uint calcblksize(code *c)
6374 {
6375     uint size;
6376     for (size = 0; c; c = code_next(c))
6377     {
6378         uint sz = calccodsize(c);
6379         //printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c.Iop);
6380         size += sz;
6381     }
6382     //printf("calcblksize(c = x%x) = %d\n", c, size);
6383     return size;
6384 }
6385 
6386 /*****************************
6387  * Calculate and return code size of a code.
6388  * Note that NOPs are sometimes used as markers, but are
6389  * never output. LINNUMs are never output.
6390  * Note: This routine must be fast. Profiling shows it is significant.
6391  */
6392 
6393 uint calccodsize(code *c)
6394 {
6395     uint size;
6396     ubyte rm,mod,ins;
6397     uint iflags;
6398     uint i32 = I32 || I64;
6399     uint a32 = i32;
6400 
6401     debug
6402     assert((a32 & ~1) == 0);
6403 
6404     iflags = c.Iflags;
6405     opcode_t op = c.Iop;
6406     //printf("calccodsize(x%08x), Iflags = x%x\n", op, iflags);
6407     if (iflags & CFvex && c.Ivex.pfx == 0xC4)
6408     {
6409         ins = vex_inssize(c);
6410         size = ins & 7;
6411         goto Lmodrm;
6412     }
6413     else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800)
6414         op = 0x0F;
6415     else
6416         op &= 0xFF;
6417     switch (op)
6418     {
6419         case 0x0F:
6420             if ((c.Iop & 0xFFFD00) == 0x0F3800)
6421             {   // 3 byte op ( 0F38-- or 0F3A-- )
6422                 ins = inssize2[(c.Iop >> 8) & 0xFF];
6423                 size = ins & 7;
6424                 if (c.Iop & 0xFF000000)
6425                   size++;
6426             }
6427             else
6428             {   // 2 byte op ( 0F-- )
6429                 ins = inssize2[c.Iop & 0xFF];
6430                 size = ins & 7;
6431                 if (c.Iop & 0xFF0000)
6432                   size++;
6433             }
6434             break;
6435 
6436         case 0x90:
6437             size = (c.Iop == PAUSE) ? 2 : 1;
6438             goto Lret2;
6439 
6440         case NOP:
6441         case ESCAPE:
6442             size = 0;                   // since these won't be output
6443             goto Lret2;
6444 
6445         case ASM:
6446             if (c.Iflags == CFaddrsize)        // kludge for DA inline asm
6447                 size = _tysize[TYnptr];
6448             else
6449                 size = cast(uint)c.IEV1.len;
6450             goto Lret2;
6451 
6452         case 0xA1:
6453         case 0xA3:
6454             if (c.Irex)
6455             {
6456                 size = 9;               // 64 bit immediate value for MOV to/from RAX
6457                 goto Lret;
6458             }
6459             goto Ldefault;
6460 
6461         case 0xF6:                      /* TEST mem8,immed8             */
6462             ins = inssize[op];
6463             size = ins & 7;
6464             if (i32)
6465                 size = inssize32[op];
6466             if ((c.Irm & (7<<3)) == 0)
6467                 size++;                 /* size of immed8               */
6468             break;
6469 
6470         case 0xF7:
6471             ins = inssize[op];
6472             size = ins & 7;
6473             if (i32)
6474                 size = inssize32[op];
6475             if ((c.Irm & (7<<3)) == 0)
6476                 size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2;
6477             break;
6478 
6479         default:
6480         Ldefault:
6481             ins = inssize[op];
6482             size = ins & 7;
6483             if (i32)
6484                 size = inssize32[op];
6485     }
6486 
6487     if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG))
6488     {
6489         if (iflags & CFwait)    // if add FWAIT prefix
6490             size++;
6491         if (iflags & CFSEG)     // if segment override
6492             size++;
6493 
6494         // If the instruction has a second operand that is not an 8 bit,
6495         // and the operand size prefix is present, then fix the size computation
6496         // because the operand size will be different.
6497         // Walter, I had problems with this bit at the end.  There can still be
6498         // an ADDRSIZE prefix for these and it does indeed change the operand size.
6499 
6500         if (iflags & (CFopsize | CFaddrsize))
6501         {
6502             if ((ins & (T|E)) == T)
6503             {
6504                 if ((op & 0xAC) == 0xA0)
6505                 {
6506                     if (iflags & CFaddrsize && !I64)
6507                     {   if (I32)
6508                             size -= 2;
6509                         else
6510                             size += 2;
6511                     }
6512                 }
6513                 else if (iflags & CFopsize)
6514                 {   if (I16)
6515                         size += 2;
6516                     else
6517                         size -= 2;
6518                 }
6519             }
6520             if (iflags & CFaddrsize)
6521             {   if (!I64)
6522                     a32 ^= 1;
6523                 size++;
6524             }
6525             if (iflags & CFopsize)
6526                 size++;                         /* +1 for OPSIZE prefix         */
6527         }
6528     }
6529 
6530 Lmodrm:
6531     if ((op & ~0x0F) == 0x70)
6532     {
6533         if (iflags & CFjmp16)           // if long branch
6534             size += I16 ? 3 : 4;        // + 3(4) bytes for JMP
6535     }
6536     else if (ins & M)                   // if modregrm byte
6537     {
6538         rm = c.Irm;
6539         mod = rm & 0xC0;
6540         if (a32 || I64)
6541         {   // 32 bit addressing
6542             if (issib(rm))
6543                 size++;
6544             switch (mod)
6545             {   case 0:
6546                     if (issib(rm) && (c.Isib & 7) == 5 ||
6547                         (rm & 7) == 5)
6548                         size += 4;      /* disp32                       */
6549                     if (c.Irex & REX_B && (rm & 7) == 5)
6550                         /* Instead of selecting R13, this mode is an [RIP] relative
6551                          * address. Although valid, it's redundant, and should not
6552                          * be generated. Instead, generate 0[R13] instead of [R13].
6553                          */
6554                         assert(0);
6555                     break;
6556 
6557                 case 0x40:
6558                     size++;             /* disp8                        */
6559                     break;
6560 
6561                 case 0x80:
6562                     size += 4;          /* disp32                       */
6563                     break;
6564 
6565                 default:
6566                     break;
6567             }
6568         }
6569         else
6570         {   // 16 bit addressing
6571             if (mod == 0x40)            /* 01: 8 bit displacement       */
6572                 size++;
6573             else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6))
6574                 size += 2;
6575         }
6576     }
6577 
6578 Lret:
6579     if (!(iflags & CFvex) && c.Irex)
6580     {
6581         size++;
6582         if (c.Irex & REX_W && (op & ~7) == 0xB8)
6583             size += 4;
6584     }
6585 Lret2:
6586     //printf("op = x%02x, size = %d\n",op,size);
6587     return size;
6588 }
6589 
6590 /********************************
6591  * Return !=0 if codes match.
6592  */
6593 
6594 static if (0)
6595 {
6596 
6597 int code_match(code *c1,code *c2)
6598 {
6599     code cs1,cs2;
6600     ubyte ins;
6601 
6602     if (c1 == c2)
6603         goto match;
6604     cs1 = *c1;
6605     cs2 = *c2;
6606     if (cs1.Iop != cs2.Iop)
6607         goto nomatch;
6608     switch (cs1.Iop)
6609     {
6610         case ESCAPE | ESCctor:
6611         case ESCAPE | ESCdtor:
6612             goto nomatch;
6613 
6614         case NOP:
6615             goto match;
6616 
6617         case ASM:
6618             if (cs1.IEV1.len == cs2.IEV1.len &&
6619                 memcmp(cs1.IEV1.bytes,cs2.IEV1.bytes,cs1.EV1.len) == 0)
6620                 goto match;
6621             else
6622                 goto nomatch;
6623 
6624         default:
6625             if ((cs1.Iop & 0xFF) == ESCAPE)
6626                 goto match;
6627             break;
6628     }
6629     if (cs1.Iflags != cs2.Iflags)
6630         goto nomatch;
6631 
6632     ins = inssize[cs1.Iop & 0xFF];
6633     if ((cs1.Iop & 0xFFFD00) == 0x0F3800)
6634     {
6635         ins = inssize2[(cs1.Iop >> 8) & 0xFF];
6636     }
6637     else if ((cs1.Iop & 0xFF00) == 0x0F00)
6638     {
6639         ins = inssize2[cs1.Iop & 0xFF];
6640     }
6641 
6642     if (ins & M)                // if modregrm byte
6643     {
6644         if (cs1.Irm != cs2.Irm)
6645             goto nomatch;
6646         if ((cs1.Irm & 0xC0) == 0xC0)
6647             goto do2;
6648         if (is32bitaddr(I32,cs1.Iflags))
6649         {
6650             if (issib(cs1.Irm) && cs1.Isib != cs2.Isib)
6651                 goto nomatch;
6652             if (
6653                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
6654                )
6655                 goto do2;       /* if no first operand  */
6656         }
6657         else
6658         {
6659             if (
6660                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
6661                )
6662                 goto do2;       /* if no first operand  */
6663         }
6664         if (cs1.IFL1 != cs2.IFL1)
6665             goto nomatch;
6666         if (flinsymtab[cs1.IFL1] && cs1.IEV1.Vsym != cs2.IEV1.Vsym)
6667             goto nomatch;
6668         if (cs1.IEV1.Voffset != cs2.IEV1.Voffset)
6669             goto nomatch;
6670     }
6671 
6672 do2:
6673     if (!(ins & T))                     // if no second operand
6674         goto match;
6675     if (cs1.IFL2 != cs2.IFL2)
6676         goto nomatch;
6677     if (flinsymtab[cs1.IFL2] && cs1.IEV2.Vsym != cs2.IEV2.Vsym)
6678         goto nomatch;
6679     if (cs1.IEV2.Voffset != cs2.IEV2.Voffset)
6680         goto nomatch;
6681 
6682 match:
6683     return 1;
6684 
6685 nomatch:
6686     return 0;
6687 }
6688 
6689 }
6690 
6691 /**************************
6692  * Write code to intermediate file.
6693  * Code starts at offset.
6694  * Returns:
6695  *      addr of end of code
6696  */
6697 
6698 private struct MiniCodeBuf
6699 {
6700 nothrow:
6701     size_t index;
6702     size_t offset;
6703     int seg;
6704     char[100] bytes; // = void;
6705 
6706     this(int seg)
6707     {
6708         index = 0;
6709         this.offset = cast(size_t)Offset(seg);
6710         this.seg = seg;
6711     }
6712 
6713     void flushx()
6714     {
6715         // Emit accumulated bytes to code segment
6716         debug assert(index < bytes.length);
6717         offset += objmod.bytes(seg, offset, cast(uint)index, bytes.ptr);
6718         index = 0;
6719     }
6720 
6721     void gen(char c) { bytes[index++] = c; }
6722 
6723     void genp(size_t n, void *p) { memcpy(&bytes[index], p, n); index += n; }
6724 
6725     void flush() { if (index) flushx(); }
6726 
6727     uint getOffset() { return cast(uint)(offset + index); }
6728 
6729     uint available() { return cast(uint)(bytes.sizeof - index); }
6730 }
6731 
6732 private void do8bit(MiniCodeBuf *pbuf, FL, evc *);
6733 private void do16bit(MiniCodeBuf *pbuf, FL, evc *,int);
6734 private void do32bit(MiniCodeBuf *pbuf, FL, evc *,int,int = 0);
6735 private void do64bit(MiniCodeBuf *pbuf, FL, evc *,int);
6736 
6737 uint codout(int seg, code *c)
6738 {
6739     ubyte rm,mod;
6740     ubyte ins;
6741     code *cn;
6742     uint flags;
6743     Symbol *s;
6744 
6745     debug
6746     if (debugc) printf("codout(%p), Coffset = x%llx\n",c,cast(ulong)Offset(seg));
6747 
6748     MiniCodeBuf ggen = void;
6749     ggen.index = 0;
6750     ggen.offset = cast(size_t)Offset(seg);
6751     ggen.seg = seg;
6752 
6753     for (; c; c = code_next(c))
6754     {
6755         debug
6756         {
6757         if (debugc) { printf("off=%02x, sz=%d, ",cast(int)ggen.getOffset(),cast(int)calccodsize(c)); code_print(c); }
6758         uint startoffset = ggen.getOffset();
6759         }
6760 
6761         opcode_t op = c.Iop;
6762         ins = inssize[op & 0xFF];
6763         switch (op & 0xFF)
6764         {
6765             case ESCAPE:
6766                 /* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */
6767                 if(op == 0x660F383E || c.Iflags & CFvex) break;
6768 
6769                 switch (op & 0xFFFF00)
6770                 {   case ESClinnum:
6771                         /* put out line number stuff    */
6772                         objmod.linnum(c.IEV1.Vsrcpos,seg,ggen.getOffset());
6773                         break;
6774 version (SCPP)
6775 {
6776 static if (1)
6777 {
6778                     case ESCctor:
6779                     case ESCdtor:
6780                     case ESCoffset:
6781                         if (config.exe != EX_WIN32)
6782                             except_pair_setoffset(c,ggen.getOffset() - funcoffset);
6783                         break;
6784 
6785                     case ESCmark:
6786                     case ESCrelease:
6787                     case ESCmark2:
6788                     case ESCrelease2:
6789                         break;
6790 }
6791 else
6792 {
6793                     case ESCctor:
6794                         except_push(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null);
6795                         break;
6796 
6797                     case ESCdtor:
6798                         except_pop(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null);
6799                         break;
6800 
6801                     case ESCmark:
6802                         except_mark();
6803                         break;
6804 
6805                     case ESCrelease:
6806                         except_release();
6807                         break;
6808 }
6809 }
6810                     case ESCadjesp:
6811                         //printf("adjust ESP %ld\n", (long)c.IEV1.Vint);
6812                         break;
6813 
6814                     default:
6815                         break;
6816                 }
6817 
6818                 debug
6819                 assert(calccodsize(c) == 0);
6820 
6821                 continue;
6822 
6823             case NOP:                   /* don't send them out          */
6824                 if (op != NOP)
6825                     break;
6826                 debug
6827                 assert(calccodsize(c) == 0);
6828 
6829                 continue;
6830 
6831             case ASM:
6832                 if (op != ASM)
6833                     break;
6834                 ggen.flush();
6835                 if (c.Iflags == CFaddrsize)    // kludge for DA inline asm
6836                 {
6837                     do32bit(&ggen, FLblockoff,&c.IEV1,0);
6838                 }
6839                 else
6840                 {
6841                     ggen.offset += objmod.bytes(seg,ggen.offset,cast(uint)c.IEV1.len,c.IEV1.bytes);
6842                 }
6843                 debug
6844                 assert(calccodsize(c) == c.IEV1.len);
6845 
6846                 continue;
6847 
6848             default:
6849                 break;
6850         }
6851         flags = c.Iflags;
6852 
6853         // See if we need to flush (don't have room for largest code sequence)
6854         if (ggen.available() < (1+4+4+8+8))
6855             ggen.flush();
6856 
6857         // see if we need to put out prefix bytes
6858         if (flags & (CFwait | CFPREFIX | CFjmp16))
6859         {
6860             int override_;
6861 
6862             if (flags & CFwait)
6863                 ggen.gen(0x9B);                      // FWAIT
6864                                                 /* ? SEGES : SEGSS      */
6865             switch (flags & CFSEG)
6866             {   case CFes:      override_ = SEGES;       goto segover;
6867                 case CFss:      override_ = SEGSS;       goto segover;
6868                 case CFcs:      override_ = SEGCS;       goto segover;
6869                 case CFds:      override_ = SEGDS;       goto segover;
6870                 case CFfs:      override_ = SEGFS;       goto segover;
6871                 case CFgs:      override_ = SEGGS;       goto segover;
6872                 segover:        ggen.gen(cast(ubyte)override_);
6873                                 break;
6874 
6875                 default:        break;
6876             }
6877 
6878             if (flags & CFaddrsize)
6879                 ggen.gen(0x67);
6880 
6881             // Do this last because of instructions like ADDPD
6882             if (flags & CFopsize)
6883                 ggen.gen(0x66);                      /* operand size         */
6884 
6885             if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */
6886             {
6887                 if (!I16)
6888                 {   // Put out 16 bit conditional jump
6889                     c.Iop = op = 0x0F00 | (0x80 | (op & 0x0F));
6890                 }
6891                 else
6892                 {
6893                     cn = code_calloc();
6894                     /*cxcalloc++;*/
6895                     cn.next = code_next(c);
6896                     c.next= cn;          // link into code
6897                     cn.Iop = JMP;              // JMP block
6898                     cn.IFL2 = c.IFL2;
6899                     cn.IEV2.Vblock = c.IEV2.Vblock;
6900                     c.Iop = op ^= 1;           // toggle condition
6901                     c.IFL2 = FLconst;
6902                     c.IEV2.Vpointer = I16 ? 3 : 5; // skip over JMP block
6903                     c.Iflags &= ~CFjmp16;
6904                 }
6905             }
6906         }
6907 
6908         if (flags & CFvex)
6909         {
6910             if (flags & CFvex3)
6911             {
6912                 ggen.gen(0xC4);
6913                 ggen.gen(cast(ubyte)VEX3_B1(c.Ivex));
6914                 ggen.gen(cast(ubyte)VEX3_B2(c.Ivex));
6915                 ggen.gen(c.Ivex.op);
6916             }
6917             else
6918             {
6919                 ggen.gen(0xC5);
6920                 ggen.gen(cast(ubyte)VEX2_B1(c.Ivex));
6921                 ggen.gen(c.Ivex.op);
6922             }
6923             ins = vex_inssize(c);
6924             goto Lmodrm;
6925         }
6926 
6927         if (op > 0xFF)
6928         {
6929             if ((op & 0xFFFD00) == 0x0F3800)
6930                 ins = inssize2[(op >> 8) & 0xFF];
6931             else if ((op & 0xFF00) == 0x0F00)
6932                 ins = inssize2[op & 0xFF];
6933 
6934             if (op & 0xFF000000)
6935             {
6936                 ubyte op1 = op >> 24;
6937                 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
6938                 {
6939                     ggen.gen(op1);
6940                     if (c.Irex)
6941                         ggen.gen(c.Irex | REX);
6942                 }
6943                 else
6944                 {
6945                     if (c.Irex)
6946                         ggen.gen(c.Irex | REX);
6947                     ggen.gen(op1);
6948                 }
6949                 ggen.gen((op >> 16) & 0xFF);
6950                 ggen.gen((op >> 8) & 0xFF);
6951                 ggen.gen(op & 0xFF);
6952             }
6953             else if (op & 0xFF0000)
6954             {
6955                 ubyte op1 = cast(ubyte)(op >> 16);
6956                 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66)
6957                 {
6958                     ggen.gen(op1);
6959                     if (c.Irex)
6960                         ggen.gen(c.Irex | REX);
6961                 }
6962                 else
6963                 {
6964                     if (c.Irex)
6965                         ggen.gen(c.Irex | REX);
6966                     ggen.gen(op1);
6967                 }
6968                 ggen.gen((op >> 8) & 0xFF);
6969                 ggen.gen(op & 0xFF);
6970             }
6971             else
6972             {
6973                 if (c.Irex)
6974                     ggen.gen(c.Irex | REX);
6975                 ggen.gen((op >> 8) & 0xFF);
6976                 ggen.gen(op & 0xFF);
6977             }
6978         }
6979         else
6980         {
6981             if (c.Irex)
6982                 ggen.gen(c.Irex | REX);
6983             ggen.gen(cast(ubyte)op);
6984         }
6985   Lmodrm:
6986         if (ins & M)            /* if modregrm byte             */
6987         {
6988             rm = c.Irm;
6989             ggen.gen(rm);
6990 
6991             // Look for an address size override when working with the
6992             // MOD R/M and SIB bytes
6993 
6994             if (is32bitaddr( I32, flags))
6995             {
6996                 if (issib(rm))
6997                     ggen.gen(c.Isib);
6998                 switch (rm & 0xC0)
6999                 {
7000                     case 0x40:
7001                         do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1);     // 8 bit
7002                         break;
7003 
7004                     case 0:
7005                         if (!(issib(rm) && (c.Isib & 7) == 5 ||
7006                               (rm & 7) == 5))
7007                             break;
7008                         goto case 0x80;
7009 
7010                     case 0x80:
7011                     {
7012                         int cfflags = CFoff;
7013                         targ_size_t val = 0;
7014                         if (I64)
7015                         {
7016                             if ((rm & modregrm(3,0,7)) == modregrm(0,0,5))      // if disp32[RIP]
7017                             {
7018                                 cfflags |= CFpc32;
7019                                 val = -4;
7020                                 reg_t reg = rm & modregrm(0,7,0);
7021                                 if (ins & T ||
7022                                     ((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0))))
7023                                 {   if (ins & E || op == 0xF6)
7024                                         val = -5;
7025                                     else if (c.Iflags & CFopsize)
7026                                         val = -6;
7027                                     else
7028                                         val = -8;
7029                                 }
7030 
7031                                 if (config.exe & (EX_OSX64 | EX_WIN64))
7032                                     /* Mach-O and Win64 fixups already take the 4 byte size
7033                                      * into account, so bias by 4
7034                                      */
7035                                     val += 4;
7036                             }
7037                         }
7038                         do32bit(&ggen, cast(FL)c.IFL1,&c.IEV1,cfflags,cast(int)val);
7039                         break;
7040                     }
7041 
7042                     default:
7043                         break;
7044                 }
7045             }
7046             else
7047             {
7048                 switch (rm & 0xC0)
7049                 {   case 0x40:
7050                         do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1);     // 8 bit
7051                         break;
7052 
7053                     case 0:
7054                         if ((rm & 7) != 6)
7055                             break;
7056                         goto case 0x80;
7057 
7058                     case 0x80:
7059                         do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,CFoff);
7060                         break;
7061 
7062                     default:
7063                         break;
7064                 }
7065             }
7066         }
7067         else
7068         {
7069             if (op == ENTER)
7070                 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,0);
7071         }
7072         flags &= CFseg | CFoff | CFselfrel;
7073         if (ins & T)                    /* if second operand            */
7074         {
7075             if (ins & E)            /* if data-8                    */
7076                 do8bit(&ggen, cast(FL) c.IFL2,&c.IEV2);
7077             else if (!I16)
7078             {
7079                 switch (op)
7080                 {
7081                     case 0xC2:              /* RETN imm16           */
7082                     case 0xCA:              /* RETF imm16           */
7083                     do16:
7084                         do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
7085                         break;
7086 
7087                     case 0xA1:
7088                     case 0xA3:
7089                         if (I64 && c.Irex)
7090                         {
7091                     do64:
7092                             do64bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
7093                             break;
7094                         }
7095                         goto case 0xA0;
7096 
7097                     case 0xA0:              /* MOV AL,byte ptr []   */
7098                     case 0xA2:
7099                         if (c.Iflags & CFaddrsize && !I64)
7100                             goto do16;
7101                         else
7102                     do32:
7103                             do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
7104                         break;
7105 
7106                     case 0x9A:
7107                     case 0xEA:
7108                         if (c.Iflags & CFopsize)
7109                             goto ptr1616;
7110                         else
7111                             goto ptr1632;
7112 
7113                     case 0x68:              // PUSH immed32
7114                         if (cast(FL)c.IFL2 == FLblock)
7115                         {
7116                             c.IFL2 = FLblockoff;
7117                             goto do32;
7118                         }
7119                         else
7120                             goto case_default;
7121 
7122                     case CALL:              // CALL rel
7123                     case JMP:               // JMP  rel
7124                         flags |= CFselfrel;
7125                         goto case_default;
7126 
7127                     default:
7128                         if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32
7129                             flags |= CFselfrel;
7130                         if (I64 && (op & ~7) == 0xB8 && c.Irex & REX_W)
7131                             goto do64;
7132                     case_default:
7133                         if (c.Iflags & CFopsize)
7134                             goto do16;
7135                         else
7136                             goto do32;
7137                 }
7138             }
7139             else
7140             {
7141                 switch (op)
7142                 {
7143                     case 0xC2:
7144                     case 0xCA:
7145                         goto do16;
7146 
7147                     case 0xA0:
7148                     case 0xA1:
7149                     case 0xA2:
7150                     case 0xA3:
7151                         if (c.Iflags & CFaddrsize)
7152                             goto do32;
7153                         else
7154                             goto do16;
7155 
7156                     case 0x9A:
7157                     case 0xEA:
7158                         if (c.Iflags & CFopsize)
7159                             goto ptr1632;
7160                         else
7161                             goto ptr1616;
7162 
7163                     ptr1616:
7164                     ptr1632:
7165                         //assert(c.IFL2 == FLfunc);
7166                         ggen.flush();
7167                         if (c.IFL2 == FLdatseg)
7168                         {
7169                             objmod.reftodatseg(seg,ggen.offset,c.IEV2.Vpointer,
7170                                     c.IEV2.Vseg,flags);
7171                             ggen.offset += 4;
7172                         }
7173                         else
7174                         {
7175                             s = c.IEV2.Vsym;
7176                             ggen.offset += objmod.reftoident(seg,ggen.offset,s,0,flags);
7177                         }
7178                         break;
7179 
7180                     case 0x68:              // PUSH immed16
7181                         if (cast(FL)c.IFL2 == FLblock)
7182                         {   c.IFL2 = FLblockoff;
7183                             goto do16;
7184                         }
7185                         else
7186                             goto case_default16;
7187 
7188                     case CALL:
7189                     case JMP:
7190                         flags |= CFselfrel;
7191                         goto default;
7192 
7193                     default:
7194                     case_default16:
7195                         if (c.Iflags & CFopsize)
7196                             goto do32;
7197                         else
7198                             goto do16;
7199                 }
7200             }
7201         }
7202         else if (op == 0xF6)            /* TEST mem8,immed8             */
7203         {
7204             if ((rm & (7<<3)) == 0)
7205                 do8bit(&ggen, cast(FL)c.IFL2,&c.IEV2);
7206         }
7207         else if (op == 0xF7)
7208         {
7209             if ((rm & (7<<3)) == 0)     /* TEST mem16/32,immed16/32     */
7210             {
7211                 if ((I32 || I64) ^ ((c.Iflags & CFopsize) != 0))
7212                     do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
7213                 else
7214                     do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags);
7215             }
7216         }
7217 
7218         debug
7219         if (ggen.getOffset() - startoffset != calccodsize(c))
7220         {
7221             printf("actual: %d, calc: %d\n", cast(int)(ggen.getOffset() - startoffset), cast(int)calccodsize(c));
7222             code_print(c);
7223             assert(0);
7224         }
7225     }
7226     ggen.flush();
7227     Offset(seg) = ggen.offset;
7228     //printf("-codout(), Coffset = x%x\n", Offset(seg));
7229     return cast(uint)ggen.offset;                      /* ending address               */
7230 }
7231 
7232 
7233 private void do64bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags)
7234 {
7235     char *p;
7236     Symbol *s;
7237     targ_size_t ad;
7238 
7239     assert(I64);
7240     switch (fl)
7241     {
7242         case FLconst:
7243             ad = *cast(targ_size_t *) uev;
7244         L1:
7245             pbuf.genp(8,&ad);
7246             return;
7247 
7248         case FLdatseg:
7249             pbuf.flush();
7250             objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,CFoffset64 | flags);
7251             break;
7252 
7253         case FLframehandler:
7254             framehandleroffset = pbuf.getOffset();
7255             ad = 0;
7256             goto L1;
7257 
7258         case FLswitch:
7259             pbuf.flush();
7260             ad = uev.Vswitch.Btableoffset;
7261             if (config.flags & CFGromable)
7262                     objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7263             else
7264                     objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff);
7265             break;
7266 
7267         case FLcsdata:
7268         case FLfardata:
7269             //symbol_print(uev.Vsym);
7270             // NOTE: In ELFOBJ all symbol refs have been tagged FLextern
7271             // strings and statics are treated like offsets from a
7272             // un-named external with is the start of .rodata or .data
7273         case FLextern:                      /* external data symbol         */
7274         case FLtlsdata:
7275             pbuf.flush();
7276             s = uev.Vsym;               /* symbol pointer               */
7277             objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags);
7278             break;
7279 
7280         case FLgotoff:
7281             if (config.exe & (EX_OSX | EX_OSX64))
7282             {
7283                 assert(0);
7284             }
7285             else if (config.exe & EX_posix)
7286             {
7287                 pbuf.flush();
7288                 s = uev.Vsym;               /* symbol pointer               */
7289                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags);
7290                 break;
7291             }
7292             else
7293                 assert(0);
7294 
7295         case FLgot:
7296             if (config.exe & (EX_OSX | EX_OSX64))
7297             {
7298                 funcsym_p.Slocalgotoffset = pbuf.getOffset();
7299                 ad = 0;
7300                 goto L1;
7301             }
7302             else if (config.exe & EX_posix)
7303             {
7304                 pbuf.flush();
7305                 s = uev.Vsym;               /* symbol pointer               */
7306                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags);
7307                 break;
7308             }
7309             else
7310                 assert(0);
7311 
7312         case FLfunc:                        /* function call                */
7313             s = uev.Vsym;               /* symbol pointer               */
7314             assert(TARGET_SEGMENTED || !tyfarfunc(s.ty()));
7315             pbuf.flush();
7316             objmod.reftoident(pbuf.seg,pbuf.offset,s,0,CFoffset64 | flags);
7317             break;
7318 
7319         case FLblock:                       /* displacement to another block */
7320             ad = uev.Vblock.Boffset - pbuf.getOffset() - 4;
7321             //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad);
7322             goto L1;
7323 
7324         case FLblockoff:
7325             pbuf.flush();
7326             assert(uev.Vblock);
7327             //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset);
7328             objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset);
7329             break;
7330 
7331         default:
7332             WRFL(fl);
7333             assert(0);
7334     }
7335     pbuf.offset += 8;
7336 }
7337 
7338 
7339 private void do32bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags, int val)
7340 {
7341     char *p;
7342     Symbol *s;
7343     targ_size_t ad;
7344 
7345     //printf("do32bit(flags = x%x)\n", flags);
7346     switch (fl)
7347     {
7348         case FLconst:
7349             assert(targ_size_t.sizeof == 4 || targ_size_t.sizeof == 8);
7350             ad = * cast(targ_size_t *) uev;
7351         L1:
7352             pbuf.genp(4,&ad);
7353             return;
7354 
7355         case FLdatseg:
7356             pbuf.flush();
7357             objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags);
7358             break;
7359 
7360         case FLframehandler:
7361             framehandleroffset = pbuf.getOffset();
7362             ad = 0;
7363             goto L1;
7364 
7365         case FLswitch:
7366             pbuf.flush();
7367             ad = uev.Vswitch.Btableoffset;
7368             if (config.flags & CFGromable)
7369             {
7370                 if (config.exe & (EX_OSX | EX_OSX64))
7371                 {
7372                     // These are magic values based on the exact code generated for the switch jump
7373                     if (I64)
7374                         uev.Vswitch.Btablebase = pbuf.getOffset() + 4;
7375                     else
7376                         uev.Vswitch.Btablebase = pbuf.getOffset() + 4 - 8;
7377                     ad -= uev.Vswitch.Btablebase;
7378                     goto L1;
7379                 }
7380                 else if (config.exe & EX_windos)
7381                 {
7382                     if (I64)
7383                     {
7384                         uev.Vswitch.Btablebase = pbuf.getOffset() + 4;
7385                         ad -= uev.Vswitch.Btablebase;
7386                         goto L1;
7387                     }
7388                     else
7389                         objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7390                 }
7391                 else
7392                 {
7393                     objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7394                 }
7395             }
7396             else
7397                     objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff);
7398             break;
7399 
7400         case FLcode:
7401             //assert(JMPJMPTABLE);            // the only use case
7402             pbuf.flush();
7403             ad = *cast(targ_size_t *) uev + pbuf.getOffset();
7404             objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7405             break;
7406 
7407         case FLcsdata:
7408         case FLfardata:
7409             //symbol_print(uev.Vsym);
7410 
7411             // NOTE: In ELFOBJ all symbol refs have been tagged FLextern
7412             // strings and statics are treated like offsets from a
7413             // un-named external with is the start of .rodata or .data
7414         case FLextern:                      /* external data symbol         */
7415         case FLtlsdata:
7416             pbuf.flush();
7417             s = uev.Vsym;               /* symbol pointer               */
7418             if (config.exe & EX_windos && I64 && (flags & CFpc32))
7419             {
7420                 /* This is for those funky fixups where the location to be fixed up
7421                  * is a 'val' amount back from the current RIP, biased by adding 4.
7422                  */
7423                 assert(val >= -5 && val <= 0);
7424                 flags |= (-val & 7) << 24;          // set CFREL value
7425                 assert(CFREL == (7 << 24));
7426                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags);
7427             }
7428             else
7429                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags);
7430             break;
7431 
7432         case FLgotoff:
7433             if (config.exe & (EX_OSX | EX_OSX64))
7434             {
7435                 assert(0);
7436             }
7437             else if (config.exe & EX_posix)
7438             {
7439                 pbuf.flush();
7440                 s = uev.Vsym;               /* symbol pointer               */
7441                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags);
7442                 break;
7443             }
7444             else
7445                 assert(0);
7446 
7447         case FLgot:
7448             if (config.exe & (EX_OSX | EX_OSX64))
7449             {
7450                 funcsym_p.Slocalgotoffset = pbuf.getOffset();
7451                 ad = 0;
7452                 goto L1;
7453             }
7454             else if (config.exe & EX_posix)
7455             {
7456                 pbuf.flush();
7457                 s = uev.Vsym;               /* symbol pointer               */
7458                 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags);
7459                 break;
7460             }
7461             else
7462                 assert(0);
7463 
7464         case FLfunc:                        /* function call                */
7465             s = uev.Vsym;               /* symbol pointer               */
7466             if (tyfarfunc(s.ty()))
7467             {   /* Large code references are always absolute    */
7468                 pbuf.flush();
7469                 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 4;
7470             }
7471             else if (s.Sseg == pbuf.seg &&
7472                      (s.Sclass == SCstatic || s.Sclass == SCglobal) &&
7473                      s.Sxtrnnum == 0 && flags & CFselfrel)
7474             {   /* if we know it's relative address     */
7475                 ad = s.Soffset - pbuf.getOffset() - 4;
7476                 goto L1;
7477             }
7478             else
7479             {
7480                 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty()));
7481                 pbuf.flush();
7482                 objmod.reftoident(pbuf.seg,pbuf.offset,s,val,flags);
7483             }
7484             break;
7485 
7486         case FLblock:                       /* displacement to another block */
7487             ad = uev.Vblock.Boffset - pbuf.getOffset() - 4;
7488             //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad);
7489             goto L1;
7490 
7491         case FLblockoff:
7492             pbuf.flush();
7493             assert(uev.Vblock);
7494             //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset);
7495             objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset);
7496             break;
7497 
7498         default:
7499             WRFL(fl);
7500             assert(0);
7501     }
7502     pbuf.offset += 4;
7503 }
7504 
7505 
7506 private void do16bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags)
7507 {
7508     char *p;
7509     Symbol *s;
7510     targ_size_t ad;
7511 
7512     switch (fl)
7513     {
7514         case FLconst:
7515             pbuf.genp(2,cast(char *) uev);
7516             return;
7517 
7518         case FLdatseg:
7519             pbuf.flush();
7520             objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags);
7521             break;
7522 
7523         case FLswitch:
7524             pbuf.flush();
7525             ad = uev.Vswitch.Btableoffset;
7526             if (config.flags & CFGromable)
7527                 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad);
7528             else
7529                 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff);
7530             break;
7531 
7532         case FLcsdata:
7533         case FLfardata:
7534         case FLextern:                      /* external data symbol         */
7535         case FLtlsdata:
7536             //assert(SIXTEENBIT || TARGET_SEGMENTED);
7537             pbuf.flush();
7538             s = uev.Vsym;               /* symbol pointer               */
7539             objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags);
7540             break;
7541 
7542         case FLfunc:                        /* function call                */
7543             //assert(SIXTEENBIT || TARGET_SEGMENTED);
7544             s = uev.Vsym;               /* symbol pointer               */
7545             if (tyfarfunc(s.ty()))
7546             {   /* Large code references are always absolute    */
7547                 pbuf.flush();
7548                 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 2;
7549             }
7550             else if (s.Sseg == pbuf.seg &&
7551                      (s.Sclass == SCstatic || s.Sclass == SCglobal) &&
7552                      s.Sxtrnnum == 0 && flags & CFselfrel)
7553             {   /* if we know it's relative address     */
7554                 ad = s.Soffset - pbuf.getOffset() - 2;
7555                 goto L1;
7556             }
7557             else
7558             {
7559                 pbuf.flush();
7560                 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags);
7561             }
7562             break;
7563 
7564         case FLblock:                       /* displacement to another block */
7565             ad = uev.Vblock.Boffset - pbuf.getOffset() - 2;
7566             debug
7567             {
7568                 targ_ptrdiff_t delta = uev.Vblock.Boffset - pbuf.getOffset() - 2;
7569                 assert(cast(short)delta == delta);
7570             }
7571         L1:
7572             pbuf.genp(2,&ad);                    // displacement
7573             return;
7574 
7575         case FLblockoff:
7576             pbuf.flush();
7577             objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset);
7578             break;
7579 
7580         default:
7581             WRFL(fl);
7582             assert(0);
7583     }
7584     pbuf.offset += 2;
7585 }
7586 
7587 
7588 private void do8bit(MiniCodeBuf *pbuf, FL fl, evc *uev)
7589 {
7590     char c;
7591     targ_ptrdiff_t delta;
7592 
7593     switch (fl)
7594     {
7595         case FLconst:
7596             c = cast(char)uev.Vuns;
7597             break;
7598 
7599         case FLblock:
7600             delta = uev.Vblock.Boffset - pbuf.getOffset() - 1;
7601             if (cast(byte)delta != delta)
7602             {
7603                 version (MARS)
7604                 {
7605                     if (uev.Vblock.Bsrcpos.Slinnum)
7606                         printf("%s(%d): ", uev.Vblock.Bsrcpos.Sfilename, uev.Vblock.Bsrcpos.Slinnum);
7607                 }
7608                 printf("block displacement of %lld exceeds the maximum offset of -128 to 127.\n", cast(long)delta);
7609                 err_exit();
7610             }
7611             c = cast(char)delta;
7612             debug assert(uev.Vblock.Boffset > pbuf.getOffset() || c != 0x7F);
7613             break;
7614 
7615         default:
7616             debug printf("fl = %d\n",fl);
7617             assert(0);
7618     }
7619     pbuf.gen(c);
7620 }
7621 
7622 
7623 /**********************************
7624  */
7625 
7626 version (SCPP)
7627 {
7628 static if (HYDRATE)
7629 {
7630 void code_hydrate(code **pc)
7631 {
7632     code *c;
7633     ubyte ins,rm;
7634     FL fl;
7635 
7636     assert(pc);
7637     while (*pc)
7638     {
7639         c = cast(code *) ph_hydrate(cast(void**)pc);
7640         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
7641             ins = vex_inssize(c);
7642         else if ((c.Iop & 0xFFFD00) == 0x0F3800)
7643             ins = inssize2[(c.Iop >> 8) & 0xFF];
7644         else if ((c.Iop & 0xFF00) == 0x0F00)
7645             ins = inssize2[c.Iop & 0xFF];
7646         else
7647             ins = inssize[c.Iop & 0xFF];
7648         switch (c.Iop)
7649         {
7650             default:
7651                 break;
7652 
7653             case ESCAPE | ESClinnum:
7654                 srcpos_hydrate(&c.IEV1.Vsrcpos);
7655                 goto done;
7656 
7657             case ESCAPE | ESCctor:
7658             case ESCAPE | ESCdtor:
7659                 el_hydrate(&c.IEV1.Vtor);
7660                 goto done;
7661 
7662             case ASM:
7663                 ph_hydrate(cast(void**)&c.IEV1.bytes);
7664                 goto done;
7665         }
7666         if (!(ins & M) ||
7667             ((rm = c.Irm) & 0xC0) == 0xC0)
7668             goto do2;           /* if no first operand          */
7669         if (is32bitaddr(I32,c.Iflags))
7670         {
7671 
7672             if (
7673                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
7674                )
7675                 goto do2;       /* if no first operand  */
7676         }
7677         else
7678         {
7679             if (
7680                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
7681                )
7682                 goto do2;       /* if no first operand  */
7683         }
7684         fl = cast(FL) c.IFL1;
7685         switch (fl)
7686         {
7687             case FLudata:
7688             case FLdata:
7689             case FLreg:
7690             case FLauto:
7691             case FLfast:
7692             case FLbprel:
7693             case FLpara:
7694             case FLcsdata:
7695             case FLfardata:
7696             case FLtlsdata:
7697             case FLfunc:
7698             case FLpseudo:
7699             case FLextern:
7700                 assert(flinsymtab[fl]);
7701                 symbol_hydrate(&c.IEV1.Vsym);
7702                 symbol_debug(c.IEV1.Vsym);
7703                 break;
7704 
7705             case FLdatseg:
7706             case FLfltreg:
7707             case FLallocatmp:
7708             case FLcs:
7709             case FLndp:
7710             case FLoffset:
7711             case FLlocalsize:
7712             case FLconst:
7713             case FLframehandler:
7714                 assert(!flinsymtab[fl]);
7715                 break;
7716 
7717             case FLcode:
7718                 ph_hydrate(cast(void**)&c.IEV1.Vcode);
7719                 break;
7720 
7721             case FLblock:
7722             case FLblockoff:
7723                 ph_hydrate(cast(void**)&c.IEV1.Vblock);
7724                 break;
7725 version (SCPP)
7726 {
7727             case FLctor:
7728             case FLdtor:
7729                 el_hydrate(cast(elem**)&c.IEV1.Vtor);
7730                 break;
7731 }
7732             case FLasm:
7733                 ph_hydrate(cast(void**)&c.IEV1.bytes);
7734                 break;
7735 
7736             default:
7737                 WRFL(fl);
7738                 assert(0);
7739         }
7740     do2:
7741         /* Ignore TEST (F6 and F7) opcodes      */
7742         if (!(ins & T))
7743             goto done;          /* if no second operand */
7744 
7745         fl = cast(FL) c.IFL2;
7746         switch (fl)
7747         {
7748             case FLudata:
7749             case FLdata:
7750             case FLreg:
7751             case FLauto:
7752             case FLfast:
7753             case FLbprel:
7754             case FLpara:
7755             case FLcsdata:
7756             case FLfardata:
7757             case FLtlsdata:
7758             case FLfunc:
7759             case FLpseudo:
7760             case FLextern:
7761                 assert(flinsymtab[fl]);
7762                 symbol_hydrate(&c.IEV2.Vsym);
7763                 symbol_debug(c.IEV2.Vsym);
7764                 break;
7765 
7766             case FLdatseg:
7767             case FLfltreg:
7768             case FLallocatmp:
7769             case FLcs:
7770             case FLndp:
7771             case FLoffset:
7772             case FLlocalsize:
7773             case FLconst:
7774             case FLframehandler:
7775                 assert(!flinsymtab[fl]);
7776                 break;
7777 
7778             case FLcode:
7779                 ph_hydrate(cast(void**)&c.IEV2.Vcode);
7780                 break;
7781 
7782             case FLblock:
7783             case FLblockoff:
7784                 ph_hydrate(cast(void**)&c.IEV2.Vblock);
7785                 break;
7786 
7787             default:
7788                 WRFL(fl);
7789                 assert(0);
7790         }
7791   done:
7792         { }
7793 
7794         pc = &c.next;
7795     }
7796 }
7797 }
7798 
7799 /**********************************
7800  */
7801 
7802 static if (DEHYDRATE)
7803 {
7804 void code_dehydrate(code **pc)
7805 {
7806     code *c;
7807     ubyte ins,rm;
7808     FL fl;
7809 
7810     while ((c = *pc) != null)
7811     {
7812         ph_dehydrate(pc);
7813 
7814         if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
7815             ins = vex_inssize(c);
7816         else if ((c.Iop & 0xFFFD00) == 0x0F3800)
7817             ins = inssize2[(c.Iop >> 8) & 0xFF];
7818         else if ((c.Iop & 0xFF00) == 0x0F00)
7819             ins = inssize2[c.Iop & 0xFF];
7820         else
7821             ins = inssize[c.Iop & 0xFF];
7822         switch (c.Iop)
7823         {
7824             default:
7825                 break;
7826 
7827             case ESCAPE | ESClinnum:
7828                 srcpos_dehydrate(&c.IEV1.Vsrcpos);
7829                 goto done;
7830 
7831             case ESCAPE | ESCctor:
7832             case ESCAPE | ESCdtor:
7833                 el_dehydrate(&c.IEV1.Vtor);
7834                 goto done;
7835 
7836             case ASM:
7837                 ph_dehydrate(&c.IEV1.bytes);
7838                 goto done;
7839         }
7840 
7841         if (!(ins & M) ||
7842             ((rm = c.Irm) & 0xC0) == 0xC0)
7843             goto do2;           /* if no first operand          */
7844         if (is32bitaddr(I32,c.Iflags))
7845         {
7846 
7847             if (
7848                 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5))
7849                )
7850                 goto do2;       /* if no first operand  */
7851         }
7852         else
7853         {
7854             if (
7855                 ((rm & 0xC0) == 0 && !((rm & 7) == 6))
7856                )
7857                 goto do2;       /* if no first operand  */
7858         }
7859         fl = cast(FL) c.IFL1;
7860         switch (fl)
7861         {
7862             case FLudata:
7863             case FLdata:
7864             case FLreg:
7865             case FLauto:
7866             case FLfast:
7867             case FLbprel:
7868             case FLpara:
7869             case FLcsdata:
7870             case FLfardata:
7871             case FLtlsdata:
7872             case FLfunc:
7873             case FLpseudo:
7874             case FLextern:
7875                 assert(flinsymtab[fl]);
7876                 symbol_dehydrate(&c.IEV1.Vsym);
7877                 break;
7878 
7879             case FLdatseg:
7880             case FLfltreg:
7881             case FLallocatmp:
7882             case FLcs:
7883             case FLndp:
7884             case FLoffset:
7885             case FLlocalsize:
7886             case FLconst:
7887             case FLframehandler:
7888                 assert(!flinsymtab[fl]);
7889                 break;
7890 
7891             case FLcode:
7892                 ph_dehydrate(&c.IEV1.Vcode);
7893                 break;
7894 
7895             case FLblock:
7896             case FLblockoff:
7897                 ph_dehydrate(&c.IEV1.Vblock);
7898                 break;
7899 version (SCPP)
7900 {
7901             case FLctor:
7902             case FLdtor:
7903                 el_dehydrate(&c.IEV1.Vtor);
7904                 break;
7905 }
7906             case FLasm:
7907                 ph_dehydrate(&c.IEV1.bytes);
7908                 break;
7909 
7910             default:
7911                 WRFL(fl);
7912                 assert(0);
7913                 break;
7914         }
7915     do2:
7916         /* Ignore TEST (F6 and F7) opcodes      */
7917         if (!(ins & T))
7918             goto done;          /* if no second operand */
7919 
7920         fl = cast(FL) c.IFL2;
7921         switch (fl)
7922         {
7923             case FLudata:
7924             case FLdata:
7925             case FLreg:
7926             case FLauto:
7927             case FLfast:
7928             case FLbprel:
7929             case FLpara:
7930             case FLcsdata:
7931             case FLfardata:
7932             case FLtlsdata:
7933             case FLfunc:
7934             case FLpseudo:
7935             case FLextern:
7936                 assert(flinsymtab[fl]);
7937                 symbol_dehydrate(&c.IEV2.Vsym);
7938                 break;
7939 
7940             case FLdatseg:
7941             case FLfltreg:
7942             case FLallocatmp:
7943             case FLcs:
7944             case FLndp:
7945             case FLoffset:
7946             case FLlocalsize:
7947             case FLconst:
7948             case FLframehandler:
7949                 assert(!flinsymtab[fl]);
7950                 break;
7951 
7952             case FLcode:
7953                 ph_dehydrate(&c.IEV2.Vcode);
7954                 break;
7955 
7956             case FLblock:
7957             case FLblockoff:
7958                 ph_dehydrate(&c.IEV2.Vblock);
7959                 break;
7960 
7961             default:
7962                 WRFL(fl);
7963                 assert(0);
7964                 break;
7965         }
7966   done:
7967         pc = &code_next(c);
7968     }
7969 }
7970 }
7971 }
7972 
7973 /***************************
7974  * Debug code to dump code structure.
7975  */
7976 
7977 void WRcodlst(code *c)
7978 {
7979     for (; c; c = code_next(c))
7980         code_print(c);
7981 }
7982 
7983 extern (C) void code_print(code* c)
7984 {
7985     ubyte ins;
7986     ubyte rexb;
7987 
7988     if (c == null)
7989     {
7990         printf("code 0\n");
7991         return;
7992     }
7993 
7994     const op = c.Iop;
7995     if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4)
7996         ins = vex_inssize(c);
7997     else if ((c.Iop & 0xFFFD00) == 0x0F3800)
7998         ins = inssize2[(op >> 8) & 0xFF];
7999     else if ((c.Iop & 0xFF00) == 0x0F00)
8000         ins = inssize2[op & 0xFF];
8001     else
8002         ins = inssize[op & 0xFF];
8003 
8004     printf("code %p: nxt=%p ",c,code_next(c));
8005 
8006     if (c.Iflags & CFvex)
8007     {
8008         if (c.Iflags & CFvex3)
8009         {
8010             printf("vex=0xC4");
8011             printf(" 0x%02X", VEX3_B1(c.Ivex));
8012             printf(" 0x%02X", VEX3_B2(c.Ivex));
8013             rexb =
8014                 ( c.Ivex.w ? REX_W : 0) |
8015                 (!c.Ivex.r ? REX_R : 0) |
8016                 (!c.Ivex.x ? REX_X : 0) |
8017                 (!c.Ivex.b ? REX_B : 0);
8018         }
8019         else
8020         {
8021             printf("vex=0xC5");
8022             printf(" 0x%02X", VEX2_B1(c.Ivex));
8023             rexb = !c.Ivex.r ? REX_R : 0;
8024         }
8025         printf(" ");
8026     }
8027     else
8028         rexb = c.Irex;
8029 
8030     if (rexb)
8031     {
8032         printf("rex=0x%02X ", c.Irex);
8033         if (rexb & REX_W)
8034             printf("W");
8035         if (rexb & REX_R)
8036             printf("R");
8037         if (rexb & REX_X)
8038             printf("X");
8039         if (rexb & REX_B)
8040             printf("B");
8041         printf(" ");
8042     }
8043     printf("op=0x%02X",op);
8044 
8045     if ((op & 0xFF) == ESCAPE)
8046     {
8047         if ((op & 0xFF00) == ESClinnum)
8048         {
8049             printf(" linnum = %d\n",c.IEV1.Vsrcpos.Slinnum);
8050             return;
8051         }
8052         printf(" ESCAPE %d",c.Iop >> 8);
8053     }
8054     if (c.Iflags)
8055         printf(" flg=%x",c.Iflags);
8056     if (ins & M)
8057     {
8058         uint rm = c.Irm;
8059         printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7);
8060         if (!I16 && issib(rm))
8061         {
8062             ubyte sib = c.Isib;
8063             printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7);
8064         }
8065         if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40)
8066         {
8067             switch (c.IFL1)
8068             {
8069                 case FLconst:
8070                 case FLoffset:
8071                     printf(" int = %4d",c.IEV1.Vuns);
8072                     break;
8073 
8074                 case FLblock:
8075                     printf(" block = %p",c.IEV1.Vblock);
8076                     break;
8077 
8078                 case FLswitch:
8079                 case FLblockoff:
8080                 case FLlocalsize:
8081                 case FLframehandler:
8082                 case 0:
8083                     break;
8084 
8085                 case FLdatseg:
8086                     printf(" FLdatseg %d.%llx",c.IEV1.Vseg,cast(ulong)c.IEV1.Vpointer);
8087                     break;
8088 
8089                 case FLauto:
8090                 case FLfast:
8091                 case FLreg:
8092                 case FLdata:
8093                 case FLudata:
8094                 case FLpara:
8095                 case FLbprel:
8096                 case FLtlsdata:
8097                 case FLextern:
8098                     printf(" ");
8099                     WRFL(cast(FL)c.IFL1);
8100                     printf(" sym='%s'",c.IEV1.Vsym.Sident.ptr);
8101                     if (c.IEV1.Voffset)
8102                         printf(".%d", cast(int)c.IEV1.Voffset);
8103                     break;
8104 
8105                 default:
8106                     WRFL(cast(FL)c.IFL1);
8107                     break;
8108             }
8109         }
8110     }
8111     if (ins & T)
8112     {
8113         printf(" ");
8114         WRFL(cast(FL)c.IFL2);
8115         switch (c.IFL2)
8116         {
8117             case FLconst:
8118                 printf(" int = %4d",c.IEV2.Vuns);
8119                 break;
8120 
8121             case FLblock:
8122                 printf(" block = %p",c.IEV2.Vblock);
8123                 break;
8124 
8125             case FLswitch:
8126             case FLblockoff:
8127             case 0:
8128             case FLlocalsize:
8129             case FLframehandler:
8130                 break;
8131 
8132             case FLdatseg:
8133                 printf(" %d.%llx",c.IEV2.Vseg,cast(ulong)c.IEV2.Vpointer);
8134                 break;
8135 
8136             case FLauto:
8137             case FLfast:
8138             case FLreg:
8139             case FLpara:
8140             case FLbprel:
8141             case FLfunc:
8142             case FLdata:
8143             case FLudata:
8144             case FLtlsdata:
8145                 printf(" sym='%s'",c.IEV2.Vsym.Sident.ptr);
8146                 break;
8147 
8148             case FLcode:
8149                 printf(" code = %p",c.IEV2.Vcode);
8150                 break;
8151 
8152             default:
8153                 WRFL(cast(FL)c.IFL2);
8154                 break;
8155         }
8156     }
8157     printf("\n");
8158 }
8159 
8160 }