1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1994-1998 by Symantec 6 * Copyright (C) 2000-2021 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod3.d, backend/cod3.d) 10 * Documentation: https://dlang.org/phobos/dmd_backend_cod3.html 11 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod3.d 12 */ 13 14 module dmd.backend.cod3; 15 16 version (SCPP) 17 version = COMPILE; 18 version (MARS) 19 version = COMPILE; 20 21 version (COMPILE) 22 { 23 24 import core.stdc.stdio; 25 import core.stdc.stdlib; 26 import core.stdc.string; 27 28 import dmd.backend.backend; 29 import dmd.backend.cc; 30 import dmd.backend.cdef; 31 import dmd.backend.cgcse; 32 import dmd.backend.code; 33 import dmd.backend.code_x86; 34 import dmd.backend.codebuilder; 35 import dmd.backend.dlist; 36 import dmd.backend.dvec; 37 import dmd.backend.melf; 38 import dmd.backend.mem; 39 import dmd.backend.el; 40 import dmd.backend.exh; 41 import dmd.backend.global; 42 import dmd.backend.obj; 43 import dmd.backend.oper; 44 import dmd.backend.outbuf; 45 import dmd.backend.rtlsym; 46 import dmd.backend.symtab; 47 import dmd.backend.ty; 48 import dmd.backend.type; 49 import dmd.backend.xmm; 50 51 version (SCPP) 52 { 53 import parser; 54 import precomp; 55 } 56 57 extern (C++): 58 59 nothrow: 60 61 version (MARS) 62 enum MARS = true; 63 else 64 enum MARS = false; 65 66 int REGSIZE(); 67 68 extern __gshared CGstate cgstate; 69 extern __gshared ubyte[FLMAX] segfl; 70 extern __gshared bool[FLMAX] stackfl, flinsymtab; 71 72 private extern (D) uint mask(uint m) { return 1 << m; } 73 74 //private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 75 76 extern __gshared targ_size_t retsize; 77 78 enum JMPJMPTABLE = false; // benchmarking shows it's slower 79 80 enum MINLL = 0x8000_0000_0000_0000L; 81 enum MAXLL = 0x7FFF_FFFF_FFFF_FFFFL; 82 83 /************* 84 * Size in bytes of each instruction. 85 * 0 means illegal instruction. 86 * bit M: if there is a modregrm field (EV1 is reserved for modregrm) 87 * bit T: if there is a second operand (EV2) 88 * bit E: if second operand is only 8 bits 89 * bit A: a short version exists for the AX reg 90 * bit R: a short version exists for regs 91 * bits 2..0: size of instruction (excluding optional bytes) 92 */ 93 94 enum 95 { 96 M = 0x80, 97 T = 0x40, 98 E = 0x20, 99 A = 0x10, 100 R = 0x08, 101 W = 0, 102 } 103 104 private __gshared ubyte[256] inssize = 105 [ M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 00 */ 106 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 08 */ 107 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 10 */ 108 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 18 */ 109 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 20 */ 110 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 28 */ 111 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 30 */ 112 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 38 */ 113 1,1,1,1, 1,1,1,1, /* 40 */ 114 1,1,1,1, 1,1,1,1, /* 48 */ 115 1,1,1,1, 1,1,1,1, /* 50 */ 116 1,1,1,1, 1,1,1,1, /* 58 */ 117 1,1,M|2,M|2, 1,1,1,1, /* 60 */ 118 T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1, /* 68 */ 119 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 70 */ 120 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 78 */ 121 M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3, M|2,M|2,M|2,M|A|R|2, /* 80 */ 122 M|A|2,M|A|2,M|A|2,M|A|2, M|2,M|2,M|2,M|R|2, /* 88 */ 123 1,1,1,1, 1,1,1,1, /* 90 */ 124 1,1,T|5,1, 1,1,1,1, /* 98 */ 125 126 // cod3_set32() patches this 127 // T|5,T|5,T|5,T|5, 1,1,1,1, /* A0 */ 128 T|3,T|3,T|3,T|3, 1,1,1,1, /* A0 */ 129 130 T|E|2,T|3,1,1, 1,1,1,1, /* A8 */ 131 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* B0 */ 132 T|3,T|3,T|3,T|3, T|3,T|3,T|3,T|3, /* B8 */ 133 M|T|E|3,M|T|E|3,T|3,1, M|2,M|2,M|T|E|R|3,M|T|R|4, /* C0 */ 134 T|E|4,1,T|3,1, 1,T|E|2,1,1, /* C8 */ 135 M|2,M|2,M|2,M|2, T|E|2,T|E|2,0,1, /* D0 */ 136 /* For the floating instructions, allow room for the FWAIT */ 137 M|2,M|2,M|2,M|2, M|2,M|2,M|2,M|2, /* D8 */ 138 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* E0 */ 139 T|3,T|3,T|5,T|E|2, 1,1,1,1, /* E8 */ 140 1,0,1,1, 1,1,M|A|2,M|A|2, /* F0 */ 141 1,1,1,1, 1,1,M|2,M|R|2 /* F8 */ 142 ]; 143 144 private __gshared const ubyte[256] inssize32 = 145 [ 2,2,2,2, 2,5,1,1, /* 00 */ 146 2,2,2,2, 2,5,1,1, /* 08 */ 147 2,2,2,2, 2,5,1,1, /* 10 */ 148 2,2,2,2, 2,5,1,1, /* 18 */ 149 2,2,2,2, 2,5,1,1, /* 20 */ 150 2,2,2,2, 2,5,1,1, /* 28 */ 151 2,2,2,2, 2,5,1,1, /* 30 */ 152 2,2,2,2, 2,5,1,1, /* 38 */ 153 1,1,1,1, 1,1,1,1, /* 40 */ 154 1,1,1,1, 1,1,1,1, /* 48 */ 155 1,1,1,1, 1,1,1,1, /* 50 */ 156 1,1,1,1, 1,1,1,1, /* 58 */ 157 1,1,2,2, 1,1,1,1, /* 60 */ 158 5,6,2,3, 1,1,1,1, /* 68 */ 159 2,2,2,2, 2,2,2,2, /* 70 */ 160 2,2,2,2, 2,2,2,2, /* 78 */ 161 3,6,3,3, 2,2,2,2, /* 80 */ 162 2,2,2,2, 2,2,2,2, /* 88 */ 163 1,1,1,1, 1,1,1,1, /* 90 */ 164 1,1,7,1, 1,1,1,1, /* 98 */ 165 5,5,5,5, 1,1,1,1, /* A0 */ 166 2,5,1,1, 1,1,1,1, /* A8 */ 167 2,2,2,2, 2,2,2,2, /* B0 */ 168 5,5,5,5, 5,5,5,5, /* B8 */ 169 3,3,3,1, 2,2,3,6, /* C0 */ 170 4,1,3,1, 1,2,1,1, /* C8 */ 171 2,2,2,2, 2,2,0,1, /* D0 */ 172 /* For the floating instructions, don't need room for the FWAIT */ 173 2,2,2,2, 2,2,2,2, /* D8 */ 174 175 2,2,2,2, 2,2,2,2, /* E0 */ 176 5,5,7,2, 1,1,1,1, /* E8 */ 177 1,0,1,1, 1,1,2,2, /* F0 */ 178 1,1,1,1, 1,1,2,2 /* F8 */ 179 ]; 180 181 /* For 2 byte opcodes starting with 0x0F */ 182 private __gshared ubyte[256] inssize2 = 183 [ M|3,M|3,M|3,M|3, 2,2,2,2, // 00 184 2,2,M|3,2, 2,M|3,2,M|T|E|4, // 08 185 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 10 186 M|3,2,2,2, 2,2,2,2, // 18 187 M|3,M|3,M|3,M|3, M|3,2,M|3,2, // 20 188 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 28 189 2,2,2,2, 2,2,2,2, // 30 190 M|4,2,M|T|E|5,2, 2,2,2,2, // 38 191 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 40 192 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 48 193 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 50 194 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 58 195 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 60 196 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 68 197 M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70 198 2,2,2,2, M|3,M|3,M|3,M|3, // 78 199 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80 200 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88 201 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 90 202 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 98 203 2,2,2,M|3, M|T|E|4,M|3,2,2, // A0 204 2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8 205 M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0 206 M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8 207 M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0 208 2,2,2,2, 2,2,2,2, // C8 209 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0 210 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8 211 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0 212 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8 213 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0 214 M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8 215 ]; 216 217 /************************************************* 218 * Generate code to save `reg` in `regsave` stack area. 219 * Params: 220 * regsave = register save areay on stack 221 * cdb = where to write generated code 222 * reg = register to save 223 * idx = set to location in regsave for use in REGSAVE_restore() 224 */ 225 226 void REGSAVE_save(ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, out uint idx) 227 { 228 if (isXMMreg(reg)) 229 { 230 regsave.alignment = 16; 231 regsave.idx = (regsave.idx + 15) & ~15; 232 idx = regsave.idx; 233 regsave.idx += 16; 234 // MOVD idx[RBP],xmm 235 opcode_t op = STOAPD; 236 if (TARGET_LINUX && I32) 237 // Haven't yet figured out why stack is not aligned to 16 238 op = STOUPD; 239 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 240 } 241 else 242 { 243 if (!regsave.alignment) 244 regsave.alignment = REGSIZE; 245 idx = regsave.idx; 246 regsave.idx += REGSIZE; 247 // MOV idx[RBP],reg 248 cdb.genc1(0x89,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 249 if (I64) 250 code_orrex(cdb.last(), REX_W); 251 } 252 reflocal = true; 253 if (regsave.idx > regsave.top) 254 regsave.top = regsave.idx; // keep high water mark 255 } 256 257 /******************************* 258 * Restore `reg` from `regsave` area. 259 * Complement REGSAVE_save(). 260 */ 261 262 void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, uint idx) 263 { 264 if (isXMMreg(reg)) 265 { 266 assert(regsave.alignment == 16); 267 // MOVD xmm,idx[RBP] 268 opcode_t op = LODAPD; 269 if (TARGET_LINUX && I32) 270 // Haven't yet figured out why stack is not aligned to 16 271 op = LODUPD; 272 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 273 } 274 else 275 { // MOV reg,idx[RBP] 276 cdb.genc1(0x8B,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 277 if (I64) 278 code_orrex(cdb.last(), REX_W); 279 } 280 } 281 282 /************************************ 283 * Size for vex encoded instruction. 284 */ 285 286 ubyte vex_inssize(code *c) 287 { 288 assert(c.Iflags & CFvex && c.Ivex.pfx == 0xC4); 289 ubyte ins; 290 if (c.Iflags & CFvex3) 291 { 292 switch (c.Ivex.mmmm) 293 { 294 case 0: // no prefix 295 case 1: // 0F 296 ins = cast(ubyte)(inssize2[c.Ivex.op] + 2); 297 break; 298 case 2: // 0F 38 299 ins = cast(ubyte)(inssize2[0x38] + 1); 300 break; 301 case 3: // 0F 3A 302 ins = cast(ubyte)(inssize2[0x3A] + 1); 303 break; 304 default: 305 printf("Iop = %x mmmm = %x\n", c.Iop, c.Ivex.mmmm); 306 assert(0); 307 } 308 } 309 else 310 { 311 ins = cast(ubyte)(inssize2[c.Ivex.op] + 1); 312 } 313 return ins; 314 } 315 316 /************************************ 317 * Determine if there is a modregrm byte for code. 318 */ 319 320 int cod3_EA(code *c) 321 { uint ins; 322 323 opcode_t op1 = c.Iop & 0xFF; 324 if (op1 == ESCAPE) 325 ins = 0; 326 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 327 ins = inssize2[(c.Iop >> 8) & 0xFF]; 328 else if ((c.Iop & 0xFF00) == 0x0F00) 329 ins = inssize2[op1]; 330 else 331 ins = inssize[op1]; 332 return ins & M; 333 } 334 335 /******************************** 336 * setup ALLREGS and BYTEREGS 337 * called by: codgen 338 */ 339 340 void cod3_initregs() 341 { 342 if (I64) 343 { 344 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 345 BYTEREGS = ALLREGS; 346 } 347 else 348 { 349 ALLREGS = ALLREGS_INIT; 350 BYTEREGS = BYTEREGS_INIT; 351 } 352 } 353 354 /******************************** 355 * set initial global variable values 356 */ 357 358 void cod3_setdefault() 359 { 360 fregsaved = mBP | mSI | mDI; 361 } 362 363 /******************************** 364 * Fix global variables for 386. 365 */ 366 367 void cod3_set32() 368 { 369 inssize[0xA0] = T|5; 370 inssize[0xA1] = T|5; 371 inssize[0xA2] = T|5; 372 inssize[0xA3] = T|5; 373 BPRM = 5; /* [EBP] addressing mode */ 374 fregsaved = mBP | mBX | mSI | mDI; // saved across function calls 375 FLOATREGS = FLOATREGS_32; 376 FLOATREGS2 = FLOATREGS2_32; 377 DOUBLEREGS = DOUBLEREGS_32; 378 if (config.flags3 & CFG3eseqds) 379 fregsaved |= mES; 380 381 foreach (ref v; inssize2[0x80 .. 0x90]) 382 v = W|T|6; 383 384 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 4; 385 } 386 387 /******************************** 388 * Fix global variables for I64. 389 */ 390 391 void cod3_set64() 392 { 393 inssize[0xA0] = T|5; // MOV AL,mem 394 inssize[0xA1] = T|5; // MOV RAX,mem 395 inssize[0xA2] = T|5; // MOV mem,AL 396 inssize[0xA3] = T|5; // MOV mem,RAX 397 BPRM = 5; // [RBP] addressing mode 398 399 fregsaved = (config.exe & EX_windos) 400 ? mBP | mBX | mDI | mSI | mR12 | mR13 | mR14 | mR15 | mES | mXMM6 | mXMM7 // also XMM8..15; 401 : mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls 402 403 FLOATREGS = FLOATREGS_64; 404 FLOATREGS2 = FLOATREGS2_64; 405 DOUBLEREGS = DOUBLEREGS_64; 406 407 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 408 BYTEREGS = ALLREGS; 409 410 foreach (ref v; inssize2[0x80 .. 0x90]) 411 v = W|T|6; 412 413 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 8; 414 } 415 416 /********************************* 417 * Word or dword align start of function. 418 * Params: 419 * seg = segment to write alignment bytes to 420 * nbytes = number of alignment bytes to write 421 */ 422 void cod3_align_bytes(int seg, size_t nbytes) 423 { 424 /* Table 4-2 from Intel Instruction Set Reference M-Z 425 * 1 bytes NOP 90 426 * 2 bytes 66 NOP 66 90 427 * 3 bytes NOP DWORD ptr [EAX] 0F 1F 00 428 * 4 bytes NOP DWORD ptr [EAX + 00H] 0F 1F 40 00 429 * 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H] 0F 1F 44 00 00 430 * 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H] 66 0F 1F 44 00 00 431 * 7 bytes NOP DWORD ptr [EAX + 00000000H] 0F 1F 80 00 00 00 00 432 * 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H] 0F 1F 84 00 00 00 00 00 433 * 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00 434 * only for CPUs: CPUID.01H.EAX[Bytes 11:8] = 0110B or 1111B 435 */ 436 437 assert(SegData[seg].SDseg == seg); 438 439 while (nbytes) 440 { size_t n = nbytes; 441 const(char)* p; 442 443 if (nbytes > 1 && (I64 || config.fpxmmregs)) 444 { 445 switch (n) 446 { 447 case 2: p = "\x66\x90"; break; 448 case 3: p = "\x0F\x1F\x00"; break; 449 case 4: p = "\x0F\x1F\x40\x00"; break; 450 case 5: p = "\x0F\x1F\x44\x00\x00"; break; 451 case 6: p = "\x66\x0F\x1F\x44\x00\x00"; break; 452 case 7: p = "\x0F\x1F\x80\x00\x00\x00\x00"; break; 453 case 8: p = "\x0F\x1F\x84\x00\x00\x00\x00\x00"; break; 454 default: p = "\x66\x0F\x1F\x84\x00\x00\x00\x00\x00"; n = 9; break; 455 } 456 } 457 else 458 { 459 static immutable ubyte[15] nops = [ 460 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 461 ]; // XCHG AX,AX 462 if (n > nops.length) 463 n = nops.length; 464 p = cast(char*)nops; 465 } 466 objmod.write_bytes(SegData[seg],cast(uint)n,cast(char*)p); 467 nbytes -= n; 468 } 469 } 470 471 /**************************** 472 * Align start of function. 473 * Params: 474 * seg = segment of function 475 */ 476 void cod3_align(int seg) 477 { 478 if (config.exe & EX_windos) 479 { 480 if (config.flags4 & CFG4speed) // if optimized for speed 481 { 482 // Pick alignment based on CPU target 483 if (config.target_cpu == TARGET_80486 || 484 config.target_cpu >= TARGET_PentiumPro) 485 { // 486 does reads on 16 byte boundaries, so if we are near 486 // such a boundary, align us to it 487 488 const nbytes = -Offset(seg) & 15; 489 if (nbytes < 8) 490 cod3_align_bytes(seg, nbytes); 491 } 492 } 493 } 494 else 495 { 496 const nbytes = -Offset(seg) & 7; 497 cod3_align_bytes(seg, nbytes); 498 } 499 } 500 501 502 /********************************** 503 * Generate code to adjust the stack pointer by `nbytes` 504 * Params: 505 * cdb = code builder 506 * nbytes = number of bytes to adjust stack pointer 507 */ 508 void cod3_stackadj(ref CodeBuilder cdb, int nbytes) 509 { 510 //printf("cod3_stackadj(%d)\n", nbytes); 511 uint grex = I64 ? REX_W << 16 : 0; 512 uint rm; 513 if (nbytes > 0) 514 rm = modregrm(3,5,SP); // SUB ESP,nbytes 515 else 516 { 517 nbytes = -nbytes; 518 rm = modregrm(3,0,SP); // ADD ESP,nbytes 519 } 520 cdb.genc2(0x81, grex | rm, nbytes); 521 } 522 523 /********************************** 524 * Generate code to align the stack pointer at `nbytes` 525 * Params: 526 * cdb = code builder 527 * nbytes = number of bytes to align stack pointer 528 */ 529 void cod3_stackalign(ref CodeBuilder cdb, int nbytes) 530 { 531 //printf("cod3_stackalign(%d)\n", nbytes); 532 const grex = I64 ? REX_W << 16 : 0; 533 const rm = modregrm(3, 4, SP); // AND ESP,-nbytes 534 cdb.genc2(0x81, grex | rm, -nbytes); 535 } 536 537 /* Constructor that links the ModuleReference to the head of 538 * the list pointed to by _Dmoduleref 539 * 540 * For ELF object files. 541 */ 542 static if (0) 543 { 544 void cod3_buildmodulector(Outbuffer* buf, int codeOffset, int refOffset) 545 { 546 /* ret 547 * codeOffset: 548 * pushad 549 * mov EAX,&ModuleReference 550 * mov ECX,_DmoduleRef 551 * mov EDX,[ECX] 552 * mov [EAX],EDX 553 * mov [ECX],EAX 554 * popad 555 * ret 556 */ 557 558 const int seg = CODE; 559 560 if (I64 && config.flags3 & CFG3pic) 561 { // LEA RAX,ModuleReference[RIP] 562 buf.writeByte(REX | REX_W); 563 buf.writeByte(LEA); 564 buf.writeByte(modregrm(0,AX,5)); 565 codeOffset += 3; 566 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_PC32, 3 /*STI_DATA*/, refOffset - 4); 567 568 // MOV RCX,_DmoduleRef@GOTPCREL[RIP] 569 buf.writeByte(REX | REX_W); 570 buf.writeByte(0x8B); 571 buf.writeByte(modregrm(0,CX,5)); 572 codeOffset += 3; 573 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_GOTPCREL, Obj.external_def("_Dmodule_ref"), -4); 574 } 575 else 576 { 577 /* movl ModuleReference*, %eax */ 578 buf.writeByte(0xB8); 579 codeOffset += 1; 580 const uint reltype = I64 ? R_X86_64_32 : R_386_32; 581 codeOffset += Obj.writerel(seg, codeOffset, reltype, 3 /*STI_DATA*/, refOffset); 582 583 /* movl _Dmodule_ref, %ecx */ 584 buf.writeByte(0xB9); 585 codeOffset += 1; 586 codeOffset += Obj.writerel(seg, codeOffset, reltype, Obj.external_def("_Dmodule_ref"), 0); 587 } 588 589 if (I64) 590 buf.writeByte(REX | REX_W); 591 buf.writeByte(0x8B); buf.writeByte(0x11); /* movl (%ecx), %edx */ 592 if (I64) 593 buf.writeByte(REX | REX_W); 594 buf.writeByte(0x89); buf.writeByte(0x10); /* movl %edx, (%eax) */ 595 if (I64) 596 buf.writeByte(REX | REX_W); 597 buf.writeByte(0x89); buf.writeByte(0x01); /* movl %eax, (%ecx) */ 598 599 buf.writeByte(0xC3); /* ret */ 600 } 601 } 602 603 /***************************** 604 * Given a type, return a mask of 605 * registers to hold that type. 606 * Input: 607 * tyf function type 608 */ 609 610 regm_t regmask(tym_t tym, tym_t tyf) 611 { 612 switch (tybasic(tym)) 613 { 614 case TYvoid: 615 case TYstruct: 616 case TYarray: 617 return 0; 618 619 case TYbool: 620 case TYwchar_t: 621 case TYchar16: 622 case TYchar: 623 case TYschar: 624 case TYuchar: 625 case TYshort: 626 case TYushort: 627 case TYint: 628 case TYuint: 629 case TYnullptr: 630 case TYnptr: 631 case TYnref: 632 case TYsptr: 633 case TYcptr: 634 case TYimmutPtr: 635 case TYsharePtr: 636 case TYrestrictPtr: 637 case TYfgPtr: 638 return mAX; 639 640 case TYfloat: 641 case TYifloat: 642 if (I64) 643 return mXMM0; 644 if (config.exe & EX_flat) 645 return mST0; 646 goto case TYlong; 647 648 case TYlong: 649 case TYulong: 650 case TYdchar: 651 if (!I16) 652 return mAX; 653 goto case TYfptr; 654 655 case TYfptr: 656 case TYhptr: 657 return mDX | mAX; 658 659 case TYcent: 660 case TYucent: 661 assert(I64); 662 return mDX | mAX; 663 664 case TYvptr: 665 return mDX | mBX; 666 667 case TYdouble: 668 case TYdouble_alias: 669 case TYidouble: 670 if (I64) 671 return mXMM0; 672 if (config.exe & EX_flat) 673 return mST0; 674 return DOUBLEREGS; 675 676 case TYllong: 677 case TYullong: 678 return I64 ? cast(regm_t) mAX : (I32 ? mDX | mAX : DOUBLEREGS); 679 680 case TYldouble: 681 case TYildouble: 682 return mST0; 683 684 case TYcfloat: 685 if (config.exe & EX_posix && I32 && tybasic(tyf) == TYnfunc) 686 return mDX | mAX; 687 goto case TYcdouble; 688 689 case TYcdouble: 690 if (I64) 691 return mXMM0 | mXMM1; 692 goto case TYcldouble; 693 694 case TYcldouble: 695 return mST01; 696 697 // SIMD vector types 698 case TYfloat4: 699 case TYdouble2: 700 case TYschar16: 701 case TYuchar16: 702 case TYshort8: 703 case TYushort8: 704 case TYlong4: 705 case TYulong4: 706 case TYllong2: 707 case TYullong2: 708 709 case TYfloat8: 710 case TYdouble4: 711 case TYschar32: 712 case TYuchar32: 713 case TYshort16: 714 case TYushort16: 715 case TYlong8: 716 case TYulong8: 717 case TYllong4: 718 case TYullong4: 719 if (!config.fpxmmregs) 720 { printf("SIMD operations not supported on this platform\n"); 721 exit(1); 722 } 723 return mXMM0; 724 725 default: 726 debug WRTYxx(tym); 727 assert(0); 728 } 729 } 730 731 /******************************* 732 * setup register allocator parameters with platform specific data 733 */ 734 void cgreg_dst_regs(reg_t* dst_integer_reg, reg_t* dst_float_reg) 735 { 736 *dst_integer_reg = AX; 737 *dst_float_reg = XMM0; 738 } 739 740 void cgreg_set_priorities(tym_t ty, const(reg_t)** pseq, const(reg_t)** pseqmsw) 741 { 742 const sz = tysize(ty); 743 744 if (tyxmmreg(ty)) 745 { 746 static immutable ubyte[9] sequence = [XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,NOREG]; 747 *pseq = sequence.ptr; 748 } 749 else if (I64) 750 { 751 if (sz == REGSIZE * 2) 752 { 753 static immutable ubyte[3] seqmsw1 = [CX,DX,NOREG]; 754 static immutable ubyte[5] seqlsw1 = [AX,BX,SI,DI,NOREG]; 755 *pseq = seqlsw1.ptr; 756 *pseqmsw = seqmsw1.ptr; 757 } 758 else 759 { // R10 is reserved for the static link 760 static immutable ubyte[15] sequence2 = [AX,CX,DX,SI,DI,R8,R9,R11,BX,R12,R13,R14,R15,BP,NOREG]; 761 *pseq = cast(ubyte*)sequence2.ptr; 762 } 763 } 764 else if (I32) 765 { 766 if (sz == REGSIZE * 2) 767 { 768 static immutable ubyte[5] seqlsw3 = [AX,BX,SI,DI,NOREG]; 769 static immutable ubyte[3] seqmsw3 = [CX,DX,NOREG]; 770 *pseq = seqlsw3.ptr; 771 *pseqmsw = seqmsw3.ptr; 772 } 773 else 774 { 775 static immutable ubyte[8] sequence4 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 776 *pseq = sequence4.ptr; 777 } 778 } 779 else 780 { assert(I16); 781 if (typtr(ty)) 782 { 783 // For pointer types, try to pick index register first 784 static immutable ubyte[8] seqidx5 = [BX,SI,DI,AX,CX,DX,BP,NOREG]; 785 *pseq = seqidx5.ptr; 786 } 787 else 788 { 789 // Otherwise, try to pick index registers last 790 static immutable ubyte[8] sequence6 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 791 *pseq = sequence6.ptr; 792 } 793 } 794 } 795 796 /******************************************* 797 * Call finally block. 798 * Params: 799 * bf = block to call 800 * retregs = registers to preserve across call 801 * Returns: 802 * code generated 803 */ 804 private code *callFinallyBlock(block *bf, regm_t retregs) 805 { 806 CodeBuilder cdbs; cdbs.ctor(); 807 CodeBuilder cdbr; cdbr.ctor(); 808 int nalign = 0; 809 810 calledFinally = true; 811 uint npush = gensaverestore(retregs,cdbs,cdbr); 812 813 if (STACKALIGN >= 16) 814 { npush += REGSIZE; 815 if (npush & (STACKALIGN - 1)) 816 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 817 cod3_stackadj(cdbs, nalign); 818 } 819 } 820 cdbs.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf); 821 regcon.immed.mval = 0; 822 if (nalign) 823 cod3_stackadj(cdbs, -nalign); 824 cdbs.append(cdbr); 825 return cdbs.finish(); 826 } 827 828 /******************************* 829 * Generate block exit code 830 */ 831 void outblkexitcode(ref CodeBuilder cdb, block *bl, ref int anyspill, const(char)* sflsave, Symbol** retsym, const regm_t mfuncregsave) 832 { 833 CodeBuilder cdb2; cdb2.ctor(); 834 elem *e = bl.Belem; 835 block *nextb; 836 regm_t retregs = 0; 837 838 if (bl.BC != BCasm) 839 assert(bl.Bcode == null); 840 841 switch (bl.BC) /* block exit condition */ 842 { 843 case BCiftrue: 844 { 845 bool jcond = true; 846 block *bs1 = bl.nthSucc(0); 847 block *bs2 = bl.nthSucc(1); 848 if (bs1 == bl.Bnext) 849 { // Swap bs1 and bs2 850 block *btmp; 851 852 jcond ^= 1; 853 btmp = bs1; 854 bs1 = bs2; 855 bs2 = btmp; 856 } 857 logexp(cdb,e,jcond,FLblock,cast(code *) bs1); 858 nextb = bs2; 859 } 860 L5: 861 if (configv.addlinenumbers && bl.Bsrcpos.Slinnum && 862 !(funcsym_p.ty() & mTYnaked)) 863 { 864 //printf("BCiftrue: %s(%u)\n", bl.Bsrcpos.Sfilename ? bl.Bsrcpos.Sfilename : "", bl.Bsrcpos.Slinnum); 865 cdb.genlinnum(bl.Bsrcpos); 866 } 867 if (nextb != bl.Bnext) 868 { 869 assert(!(bl.Bflags & BFLepilog)); 870 genjmp(cdb,JMP,FLblock,nextb); 871 } 872 break; 873 874 case BCjmptab: 875 case BCifthen: 876 case BCswitch: 877 { 878 assert(!(bl.Bflags & BFLepilog)); 879 doswitch(cdb,bl); // hide messy details 880 break; 881 } 882 version (MARS) 883 { 884 case BCjcatch: // D catch clause of try-catch 885 assert(ehmethod(funcsym_p) != EHmethod.EH_NONE); 886 // Mark all registers as destroyed. This will prevent 887 // register assignments to variables used in catch blocks. 888 getregs(cdb,lpadregs()); 889 890 if (config.ehmethod == EHmethod.EH_DWARF) 891 { 892 /* Each block must have ESP set to the same value it was at the end 893 * of the prolog. But the unwinder calls catch blocks with ESP set 894 * at the value it was when the throwing function was called, which 895 * may have arguments pushed on the stack. 896 * This instruction will reset ESP to the correct offset from EBP. 897 */ 898 cdb.gen1(ESCAPE | ESCfixesp); 899 } 900 goto case_goto; 901 } 902 version (SCPP) 903 { 904 case BCcatch: // C++ catch clause of try-catch 905 // Mark all registers as destroyed. This will prevent 906 // register assignments to variables used in catch blocks. 907 getregs(cdb,allregs | mES); 908 goto case_goto; 909 910 case BCtry: 911 usednteh |= EHtry; 912 if (config.exe == EX_WIN32) 913 usednteh |= NTEHtry; 914 goto case_goto; 915 } 916 case BCgoto: 917 nextb = bl.nthSucc(0); 918 if ((MARS || 919 funcsym_p.Sfunc.Fflags3 & Fnteh) && 920 ehmethod(funcsym_p) != EHmethod.EH_DWARF && 921 bl.Btry != nextb.Btry && 922 nextb.BC != BC_finally) 923 { 924 regm_t retregsx = 0; 925 gencodelem(cdb,e,&retregsx,true); 926 int toindex = nextb.Btry ? nextb.Btry.Bscope_index : -1; 927 assert(bl.Btry); 928 int fromindex = bl.Btry.Bscope_index; 929 version (MARS) 930 { 931 if (toindex + 1 == fromindex) 932 { // Simply call __finally 933 if (bl.Btry && 934 bl.Btry.nthSucc(1).BC == BCjcatch) 935 { 936 goto L5; // it's a try-catch, not a try-finally 937 } 938 } 939 } 940 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 941 config.ehmethod == EHmethod.EH_SEH) 942 { 943 nteh_unwind(cdb,0,toindex); 944 } 945 else 946 { 947 version (MARS) 948 { 949 if (toindex + 1 <= fromindex) 950 { 951 //c = cat(c, linux_unwind(0, toindex)); 952 block *bt; 953 954 //printf("B%d: fromindex = %d, toindex = %d\n", bl.Bdfoidx, fromindex, toindex); 955 bt = bl; 956 while ((bt = bt.Btry) != null && bt.Bscope_index != toindex) 957 { block *bf; 958 959 //printf("\tbt.Bscope_index = %d, bt.Blast_index = %d\n", bt.Bscope_index, bt.Blast_index); 960 bf = bt.nthSucc(1); 961 // Only look at try-finally blocks 962 if (bf.BC == BCjcatch) 963 continue; 964 965 if (bf == nextb) 966 continue; 967 //printf("\tbf = B%d, nextb = B%d\n", bf.Bdfoidx, nextb.Bdfoidx); 968 if (nextb.BC == BCgoto && 969 !nextb.Belem && 970 bf == nextb.nthSucc(0)) 971 continue; 972 973 // call __finally 974 cdb.append(callFinallyBlock(bf.nthSucc(0), retregsx)); 975 } 976 } 977 } 978 } 979 goto L5; 980 } 981 case_goto: 982 { 983 regm_t retregsx = 0; 984 gencodelem(cdb,e,&retregsx,true); 985 if (anyspill) 986 { // Add in the epilog code 987 CodeBuilder cdbstore; cdbstore.ctor(); 988 CodeBuilder cdbload; cdbload.ctor(); 989 990 for (int i = 0; i < anyspill; i++) 991 { Symbol *s = globsym[i]; 992 993 if (s.Sflags & SFLspill && 994 vec_testbit(dfoidx,s.Srange)) 995 { 996 s.Sfl = sflsave[i]; // undo block register assignments 997 cgreg_spillreg_epilog(bl,s,cdbstore,cdbload); 998 } 999 } 1000 cdb.append(cdbstore); 1001 cdb.append(cdbload); 1002 } 1003 nextb = bl.nthSucc(0); 1004 goto L5; 1005 } 1006 1007 case BC_try: 1008 if (config.ehmethod == EHmethod.EH_NONE || funcsym_p.Sfunc.Fflags3 & Feh_none) 1009 { 1010 /* Need to use frame pointer to access locals, not the stack pointer, 1011 * because we'll be calling the BC_finally blocks and the stack will be off. 1012 */ 1013 needframe = 1; 1014 } 1015 else if (config.ehmethod == EHmethod.EH_SEH || config.ehmethod == EHmethod.EH_WIN32) 1016 { 1017 usednteh |= NTEH_try; 1018 nteh_usevars(); 1019 } 1020 else 1021 usednteh |= EHtry; 1022 goto case_goto; 1023 1024 case BC_finally: 1025 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 1026 { 1027 // Mark scratch registers as destroyed. 1028 getregsNoSave(lpadregs()); 1029 1030 regm_t retregsx = 0; 1031 gencodelem(cdb,bl.Belem,&retregsx,true); 1032 1033 // JMP bl.nthSucc(1) 1034 nextb = bl.nthSucc(1); 1035 1036 goto L5; 1037 } 1038 else 1039 { 1040 if (config.ehmethod == EHmethod.EH_SEH || 1041 config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none)) 1042 { 1043 // Mark all registers as destroyed. This will prevent 1044 // register assignments to variables used in finally blocks. 1045 getregsNoSave(lpadregs()); 1046 } 1047 1048 assert(!e); 1049 // Generate CALL to finalizer code 1050 cdb.append(callFinallyBlock(bl.nthSucc(0), 0)); 1051 1052 // JMP bl.nthSucc(1) 1053 nextb = bl.nthSucc(1); 1054 1055 goto L5; 1056 } 1057 1058 case BC_lpad: 1059 { 1060 assert(ehmethod(funcsym_p) == EHmethod.EH_DWARF); 1061 // Mark all registers as destroyed. This will prevent 1062 // register assignments to variables used in finally blocks. 1063 getregsNoSave(lpadregs()); 1064 1065 regm_t retregsx = 0; 1066 gencodelem(cdb,bl.Belem,&retregsx,true); 1067 1068 // JMP bl.nthSucc(0) 1069 nextb = bl.nthSucc(0); 1070 goto L5; 1071 } 1072 1073 case BC_ret: 1074 { 1075 regm_t retregsx = 0; 1076 gencodelem(cdb,e,&retregsx,true); 1077 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 1078 { 1079 } 1080 else 1081 cdb.gen1(0xC3); // RET 1082 break; 1083 } 1084 1085 static if (NTEXCEPTIONS) 1086 { 1087 case BC_except: 1088 { 1089 assert(!e); 1090 usednteh |= NTEH_except; 1091 nteh_setsp(cdb,0x8B); 1092 getregsNoSave(allregs); 1093 nextb = bl.nthSucc(0); 1094 goto L5; 1095 } 1096 case BC_filter: 1097 { 1098 nteh_filter(cdb, bl); 1099 // Mark all registers as destroyed. This will prevent 1100 // register assignments to variables used in filter blocks. 1101 getregsNoSave(allregs); 1102 regm_t retregsx = regmask(e.Ety, TYnfunc); 1103 gencodelem(cdb,e,&retregsx,true); 1104 cdb.gen1(0xC3); // RET 1105 break; 1106 } 1107 } 1108 1109 case BCretexp: 1110 reg_t reg1, reg2, lreg, mreg; 1111 retregs = allocretregs(e.Ety, e.ET, funcsym_p.ty(), reg1, reg2); 1112 1113 lreg = mreg = NOREG; 1114 if (reg1 == NOREG) 1115 {} 1116 else if (tybasic(e.Ety) == TYcfloat) 1117 lreg = ST01; 1118 else if (mask(reg1) & (mST0 | mST01)) 1119 lreg = reg1; 1120 else if (reg2 == NOREG) 1121 lreg = reg1; 1122 else if (mask(reg1) & XMMREGS) 1123 { 1124 lreg = XMM0; 1125 mreg = XMM1; 1126 } 1127 else 1128 { 1129 lreg = mask(reg1) & mLSW ? reg1 : AX; 1130 mreg = mask(reg2) & mMSW ? reg2 : DX; 1131 } 1132 if (reg1 != NOREG) 1133 retregs = (mask(lreg) | mask(mreg)) & ~mask(NOREG); 1134 1135 // For the final load into the return regs, don't set regcon.used, 1136 // so that the optimizer can potentially use retregs for register 1137 // variable assignments. 1138 1139 if (config.flags4 & CFG4optimized) 1140 { regm_t usedsave; 1141 1142 docommas(cdb,&e); 1143 usedsave = regcon.used; 1144 if (!OTleaf(e.Eoper)) 1145 gencodelem(cdb,e,&retregs,true); 1146 else 1147 { 1148 if (e.Eoper == OPconst) 1149 regcon.mvar = 0; 1150 gencodelem(cdb,e,&retregs,true); 1151 regcon.used = usedsave; 1152 if (e.Eoper == OPvar) 1153 { Symbol *s = e.EV.Vsym; 1154 1155 if (s.Sfl == FLreg && s.Sregm != mAX) 1156 *retsym = s; 1157 } 1158 } 1159 } 1160 else 1161 { 1162 gencodelem(cdb,e,&retregs,true); 1163 } 1164 1165 if (reg1 == NOREG) 1166 { 1167 } 1168 else if ((mask(reg1) | mask(reg2)) & (mST0 | mST01)) 1169 { 1170 assert(reg1 == lreg && reg2 == NOREG); 1171 regm_t pretregs = mask(reg1) | mask(reg2); 1172 fixresult87(cdb, e, retregs, &pretregs, true); 1173 } 1174 // fix return registers 1175 else if (tybasic(e.Ety) == TYcfloat) 1176 { 1177 assert(lreg == ST01); 1178 if (I64) 1179 { 1180 assert(reg2 == NOREG); 1181 // spill 1182 pop87(); 1183 pop87(); 1184 cdb.genfltreg(0xD9, 3, tysize(TYfloat)); 1185 genfwait(cdb); 1186 cdb.genfltreg(0xD9, 3, 0); 1187 genfwait(cdb); 1188 // reload 1189 if (config.exe == EX_WIN64) 1190 { 1191 assert(reg1 == AX); 1192 cdb.genfltreg(LOD, reg1, 0); 1193 code_orrex(cdb.last(), REX_W); 1194 } 1195 else 1196 { 1197 assert(reg1 == XMM0); 1198 cdb.genxmmreg(xmmload(TYdouble), reg1, 0, TYdouble); 1199 } 1200 } 1201 else 1202 { 1203 assert(reg1 == AX && reg2 == DX); 1204 regm_t pretregs = mask(reg1) | mask(reg2); 1205 fixresult_complex87(cdb, e, retregs, &pretregs, true); 1206 } 1207 } 1208 else if (reg2 == NOREG) 1209 assert(lreg == reg1); 1210 else for (int v = 0; v < 2; v++) 1211 { 1212 if (v ^ (reg1 != mreg)) 1213 genmovreg(cdb, reg1, lreg); 1214 else 1215 genmovreg(cdb, reg2, mreg); 1216 } 1217 if (reg1 != NOREG) 1218 retregs = (mask(reg1) | mask(reg2)) & ~mask(NOREG); 1219 goto L4; 1220 1221 case BCret: 1222 retregs = 0; 1223 gencodelem(cdb,e,&retregs,true); 1224 L4: 1225 if (retregs == mST0) 1226 { assert(global87.stackused == 1); 1227 pop87(); // account for return value 1228 } 1229 else if (retregs == mST01) 1230 { assert(global87.stackused == 2); 1231 pop87(); 1232 pop87(); // account for return value 1233 } 1234 1235 if (MARS || usednteh & NTEH_try) 1236 { 1237 block *bt = bl; 1238 while ((bt = bt.Btry) != null) 1239 { 1240 block *bf = bt.nthSucc(1); 1241 version (MARS) 1242 { 1243 // Only look at try-finally blocks 1244 if (bf.BC == BCjcatch) 1245 { 1246 continue; 1247 } 1248 } 1249 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 1250 config.ehmethod == EHmethod.EH_SEH) 1251 { 1252 if (bt.Bscope_index == 0) 1253 { 1254 // call __finally 1255 CodeBuilder cdbs; cdbs.ctor(); 1256 CodeBuilder cdbr; cdbr.ctor(); 1257 1258 nteh_gensindex(cdb,-1); 1259 gensaverestore(retregs,cdbs,cdbr); 1260 cdb.append(cdbs); 1261 cdb.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf.nthSucc(0)); 1262 regcon.immed.mval = 0; 1263 cdb.append(cdbr); 1264 } 1265 else 1266 { 1267 nteh_unwind(cdb,retregs,~0); 1268 } 1269 break; 1270 } 1271 else 1272 { 1273 // call __finally 1274 cdb.append(callFinallyBlock(bf.nthSucc(0), retregs)); 1275 } 1276 } 1277 } 1278 break; 1279 1280 case BCexit: 1281 retregs = 0; 1282 gencodelem(cdb,e,&retregs,true); 1283 if (config.flags4 & CFG4optimized) 1284 mfuncreg = mfuncregsave; 1285 break; 1286 1287 case BCasm: 1288 { 1289 assert(!e); 1290 // Mark destroyed registers 1291 CodeBuilder cdbx; cdbx.ctor(); 1292 getregs(cdbx,iasm_regs(bl)); // mark destroyed registers 1293 code *c = cdbx.finish(); 1294 if (bl.Bsucc) 1295 { nextb = bl.nthSucc(0); 1296 if (!bl.Bnext) 1297 { 1298 cdb.append(bl.Bcode); 1299 cdb.append(c); 1300 goto L5; 1301 } 1302 if (nextb != bl.Bnext && 1303 bl.Bnext && 1304 !(bl.Bnext.BC == BCgoto && 1305 !bl.Bnext.Belem && 1306 nextb == bl.Bnext.nthSucc(0))) 1307 { 1308 // See if already have JMP at end of block 1309 code *cl = code_last(bl.Bcode); 1310 if (!cl || cl.Iop != JMP) 1311 { 1312 cdb.append(bl.Bcode); 1313 cdb.append(c); 1314 goto L5; // add JMP at end of block 1315 } 1316 } 1317 } 1318 cdb.append(bl.Bcode); 1319 break; 1320 } 1321 1322 default: 1323 debug 1324 printf("bl.BC = %d\n",bl.BC); 1325 assert(0); 1326 } 1327 } 1328 1329 /*************************** 1330 * Allocate registers for function return values. 1331 * 1332 * Params: 1333 * ty = return type 1334 * t = return type extended info 1335 * tyf = function type 1336 * reg1 = set to the first part register, else NOREG 1337 * reg2 = set to the second part register, else NOREG 1338 * 1339 * Returns: 1340 * a bit mask of return registers. 1341 * 0 if function returns on the stack or returns void. 1342 */ 1343 regm_t allocretregs(const tym_t ty, type* t, const tym_t tyf, out reg_t reg1, out reg_t reg2) 1344 { 1345 //printf("allocretregs()\n"); 1346 reg1 = reg2 = NOREG; 1347 1348 if (!(config.exe & EX_posix)) 1349 return regmask(ty, tyf); // for non-Posix ABI 1350 1351 /* The rest is for the Itanium ABI 1352 */ 1353 1354 const tyb = tybasic(ty); 1355 if (tyb == TYvoid) 1356 return 0; 1357 1358 tym_t ty1 = tyb; 1359 tym_t ty2 = TYMAX; // stays TYMAX if only one register is needed 1360 1361 if (ty & mTYxmmgpr) 1362 { 1363 ty1 = TYdouble; 1364 ty2 = TYllong; 1365 } 1366 else if (ty & mTYgprxmm) 1367 { 1368 ty1 = TYllong; 1369 ty2 = TYdouble; 1370 } 1371 1372 if (tyb == TYstruct) 1373 { 1374 assert(t); 1375 ty1 = t.Tty; 1376 } 1377 1378 const tyfb = tybasic(tyf); 1379 switch (tyrelax(ty1)) 1380 { 1381 case TYcent: 1382 if (I32) 1383 return 0; 1384 ty1 = ty2 = TYllong; 1385 break; 1386 1387 case TYcdouble: 1388 if (tyfb == TYjfunc && I32) 1389 break; 1390 if (I32) 1391 return 0; 1392 ty1 = ty2 = TYdouble; 1393 break; 1394 1395 case TYcfloat: 1396 if (tyfb == TYjfunc && I32) 1397 break; 1398 if (I32) 1399 goto case TYllong; 1400 ty1 = TYdouble; 1401 break; 1402 1403 case TYcldouble: 1404 if (tyfb == TYjfunc && I32) 1405 break; 1406 if (I32) 1407 return 0; 1408 break; 1409 1410 case TYllong: 1411 if (I32) 1412 ty1 = ty2 = TYlong; 1413 break; 1414 1415 case TYarray: 1416 type* targ1, targ2; 1417 argtypes(t, targ1, targ2); 1418 if (targ1) 1419 ty1 = targ1.Tty; 1420 else 1421 return 0; 1422 if (targ2) 1423 ty2 = targ2.Tty; 1424 break; 1425 1426 case TYstruct: 1427 assert(t); 1428 if (I64) 1429 { 1430 assert(tybasic(t.Tty) == TYstruct); 1431 if (const targ1 = t.Ttag.Sstruct.Sarg1type) 1432 ty1 = targ1.Tty; 1433 else 1434 return 0; 1435 if (const targ2 = t.Ttag.Sstruct.Sarg2type) 1436 ty2 = targ2.Tty; 1437 break; 1438 } 1439 return 0; 1440 1441 default: 1442 break; 1443 } 1444 1445 /* now we have ty1 and ty2, use that to determine which register 1446 * is used for ty1 and which for ty2 1447 */ 1448 1449 static struct RetRegsAllocator 1450 { 1451 nothrow: 1452 static immutable reg_t[2] gpr_regs = [AX, DX]; 1453 static immutable reg_t[2] xmm_regs = [XMM0, XMM1]; 1454 1455 uint cntgpr = 0, 1456 cntxmm = 0; 1457 1458 reg_t gpr() { return gpr_regs[cntgpr++]; } 1459 reg_t xmm() { return xmm_regs[cntxmm++]; } 1460 } 1461 1462 RetRegsAllocator rralloc; 1463 1464 reg_t allocreg(tym_t tym) 1465 { 1466 if (tym == TYMAX) 1467 return NOREG; 1468 switch (tysize(tym)) 1469 { 1470 case 1: 1471 case 2: 1472 case 4: 1473 if (tyfloating(tym)) 1474 return I64 ? rralloc.xmm() : ST0; 1475 else 1476 return rralloc.gpr(); 1477 1478 case 8: 1479 if (tycomplex(tym)) 1480 { 1481 assert(tyfb == TYjfunc && I32); 1482 return ST01; 1483 } 1484 assert(I64 || tyfloating(tym)); 1485 goto case 4; 1486 1487 default: 1488 if (tybasic(tym) == TYldouble || tybasic(tym) == TYildouble) 1489 { 1490 return ST0; 1491 } 1492 else if (tybasic(tym) == TYcldouble) 1493 { 1494 return ST01; 1495 } 1496 else if (tycomplex(tym) && tyfb == TYjfunc && I32) 1497 { 1498 return ST01; 1499 } 1500 else if (tysimd(tym)) 1501 { 1502 return rralloc.xmm(); 1503 } 1504 1505 debug WRTYxx(tym); 1506 assert(0); 1507 } 1508 } 1509 1510 reg1 = allocreg(ty1); 1511 reg2 = allocreg(ty2); 1512 1513 return (mask(reg1) | mask(reg2)) & ~mask(NOREG); 1514 } 1515 1516 /*********************************************** 1517 * Struct necessary for sorting switch cases. 1518 */ 1519 1520 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 1521 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 1522 1523 extern (C) // qsort cmp functions need to be "C" 1524 { 1525 struct CaseVal 1526 { 1527 targ_ullong val; 1528 block *target; 1529 1530 /* Sort function for qsort() */ 1531 extern (C) static nothrow int cmp(scope const(void*) p, scope const(void*) q) 1532 { 1533 const(CaseVal)* c1 = cast(const(CaseVal)*)p; 1534 const(CaseVal)* c2 = cast(const(CaseVal)*)q; 1535 return (c1.val < c2.val) ? -1 : ((c1.val == c2.val) ? 0 : 1); 1536 } 1537 } 1538 } 1539 1540 /*** 1541 * Generate comparison of [reg2,reg] with val 1542 */ 1543 private void cmpval(ref CodeBuilder cdb, targ_llong val, uint sz, reg_t reg, reg_t reg2, reg_t sreg) 1544 { 1545 if (I64 && sz == 8) 1546 { 1547 assert(reg2 == NOREG); 1548 if (val == cast(int)val) // if val is a 64 bit value sign-extended from 32 bits 1549 { 1550 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,value32 1551 cdb.last().Irex |= REX_W; // 64 bit operand 1552 } 1553 else 1554 { 1555 assert(sreg != NOREG); 1556 movregconst(cdb,sreg,cast(targ_size_t)val,64); // MOV sreg,val64 1557 genregs(cdb,0x3B,reg,sreg); // CMP reg,sreg 1558 code_orrex(cdb.last(), REX_W); 1559 getregsNoSave(mask(sreg)); // don't remember we loaded this constant 1560 } 1561 } 1562 else if (reg2 == NOREG) 1563 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1564 else 1565 { 1566 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1567 code *cnext = gennop(null); 1568 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1569 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1570 cdb.append(cnext); 1571 } 1572 } 1573 1574 private void ifthen(ref CodeBuilder cdb, CaseVal *casevals, size_t ncases, 1575 uint sz, reg_t reg, reg_t reg2, reg_t sreg, block *bdefault, bool last) 1576 { 1577 if (ncases >= 4 && config.flags4 & CFG4speed) 1578 { 1579 size_t pivot = ncases >> 1; 1580 1581 // Compares for casevals[0..pivot] 1582 CodeBuilder cdb1; cdb1.ctor(); 1583 ifthen(cdb1, casevals, pivot, sz, reg, reg2, sreg, bdefault, true); 1584 1585 // Compares for casevals[pivot+1..ncases] 1586 CodeBuilder cdb2; cdb2.ctor(); 1587 ifthen(cdb2, casevals + pivot + 1, ncases - pivot - 1, sz, reg, reg2, sreg, bdefault, last); 1588 code *c2 = gennop(null); 1589 1590 // Compare for caseval[pivot] 1591 cmpval(cdb, casevals[pivot].val, sz, reg, reg2, sreg); 1592 genjmp(cdb,JE,FLblock,casevals[pivot].target); // JE target 1593 // Note uint jump here, as cases were sorted using uint comparisons 1594 genjmp(cdb,JA,FLcode,cast(block *) c2); // JG c2 1595 1596 cdb.append(cdb1); 1597 cdb.append(c2); 1598 cdb.append(cdb2); 1599 } 1600 else 1601 { // Not worth doing a binary search, just do a sequence of CMP/JE 1602 for (size_t n = 0; n < ncases; n++) 1603 { 1604 targ_llong val = casevals[n].val; 1605 cmpval(cdb, val, sz, reg, reg2, sreg); 1606 code *cnext = null; 1607 if (reg2 != NOREG) 1608 { 1609 cnext = gennop(null); 1610 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1611 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1612 } 1613 genjmp(cdb,JE,FLblock,casevals[n].target); // JE caseaddr 1614 cdb.append(cnext); 1615 } 1616 1617 if (last) // if default is not next block 1618 genjmp(cdb,JMP,FLblock,bdefault); 1619 } 1620 } 1621 1622 /******************************* 1623 * Generate code for blocks ending in a switch statement. 1624 * Take BCswitch and decide on 1625 * BCifthen use if - then code 1626 * BCjmptab index into jump table 1627 * BCswitch search table for match 1628 */ 1629 1630 void doswitch(ref CodeBuilder cdb, block *b) 1631 { 1632 targ_ulong msw; 1633 1634 // If switch tables are in code segment and we need a CS: override to get at them 1635 bool csseg = cast(bool)(config.flags & CFGromable); 1636 1637 //printf("doswitch(%d)\n", b.BC); 1638 elem *e = b.Belem; 1639 elem_debug(e); 1640 docommas(cdb,&e); 1641 cgstate.stackclean++; 1642 tym_t tys = tybasic(e.Ety); 1643 int sz = _tysize[tys]; 1644 bool dword = (sz == 2 * REGSIZE); 1645 bool mswsame = true; // assume all msw's are the same 1646 targ_llong *p = b.Bswitch; // pointer to case data 1647 assert(p); 1648 uint ncases = cast(uint)*p++; // number of cases 1649 1650 targ_llong vmax = MINLL; // smallest possible llong 1651 targ_llong vmin = MAXLL; // largest possible llong 1652 for (uint n = 0; n < ncases; n++) // find max and min case values 1653 { 1654 targ_llong val = *p++; 1655 if (val > vmax) vmax = val; 1656 if (val < vmin) vmin = val; 1657 if (REGSIZE == 2) 1658 { 1659 ushort ms = (val >> 16) & 0xFFFF; 1660 if (n == 0) 1661 msw = ms; 1662 else if (msw != ms) 1663 mswsame = 0; 1664 } 1665 else // REGSIZE == 4 1666 { 1667 targ_ulong ms = (val >> 32) & 0xFFFFFFFF; 1668 if (n == 0) 1669 msw = ms; 1670 else if (msw != ms) 1671 mswsame = 0; 1672 } 1673 } 1674 p -= ncases; 1675 //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin); 1676 1677 /* Three kinds of switch strategies - pick one 1678 */ 1679 if (ncases <= 3) 1680 goto Lifthen; 1681 else if (I16 && cast(targ_ullong)(vmax - vmin) <= ncases * 2) 1682 goto Ljmptab; // >=50% of the table is case values, rest is default 1683 else if (cast(targ_ullong)(vmax - vmin) <= ncases * 3) 1684 goto Ljmptab; // >= 33% of the table is case values, rest is default 1685 else if (I16) 1686 goto Lswitch; 1687 else 1688 goto Lifthen; 1689 1690 /*************************************************************************/ 1691 { // generate if-then sequence 1692 Lifthen: 1693 regm_t retregs = ALLREGS; 1694 b.BC = BCifthen; 1695 scodelem(cdb,e,&retregs,0,true); 1696 reg_t reg, reg2; 1697 if (dword) 1698 { reg = findreglsw(retregs); 1699 reg2 = findregmsw(retregs); 1700 } 1701 else 1702 { 1703 reg = findreg(retregs); // reg that result is in 1704 reg2 = NOREG; 1705 } 1706 list_t bl = b.Bsucc; 1707 block *bdefault = b.nthSucc(0); 1708 if (dword && mswsame) 1709 { 1710 cdb.genc2(0x81,modregrm(3,7,reg2),msw); // CMP reg2,MSW 1711 genjmp(cdb,JNE,FLblock,bdefault); // JNE default 1712 reg2 = NOREG; 1713 } 1714 1715 reg_t sreg = NOREG; // may need a scratch register 1716 1717 // Put into casevals[0..ncases] so we can sort then slice 1718 CaseVal *casevals = cast(CaseVal *)malloc(ncases * CaseVal.sizeof); 1719 assert(casevals); 1720 for (uint n = 0; n < ncases; n++) 1721 { 1722 casevals[n].val = p[n]; 1723 bl = list_next(bl); 1724 casevals[n].target = list_block(bl); 1725 1726 // See if we need a scratch register 1727 if (sreg == NOREG && I64 && sz == 8 && p[n] != cast(int)p[n]) 1728 { regm_t regm = ALLREGS & ~mask(reg); 1729 allocreg(cdb,®m, &sreg, TYint); 1730 } 1731 } 1732 1733 // Sort cases so we can do a runtime binary search 1734 qsort(casevals, ncases, CaseVal.sizeof, &CaseVal.cmp); 1735 1736 //for (uint n = 0; n < ncases; n++) 1737 //printf("casevals[%lld] = x%x\n", n, casevals[n].val); 1738 1739 // Generate binary tree of comparisons 1740 ifthen(cdb, casevals, ncases, sz, reg, reg2, sreg, bdefault, bdefault != b.Bnext); 1741 1742 free(casevals); 1743 1744 cgstate.stackclean--; 1745 return; 1746 } 1747 1748 /*************************************************************************/ 1749 { 1750 // Use switch value to index into jump table 1751 Ljmptab: 1752 //printf("Ljmptab:\n"); 1753 1754 b.BC = BCjmptab; 1755 1756 /* If vmin is small enough, we can just set it to 0 and the jump 1757 * table entries from 0..vmin-1 can be set with the default target. 1758 * This saves the SUB instruction. 1759 * Must be same computation as used in outjmptab(). 1760 */ 1761 if (vmin > 0 && vmin <= _tysize[TYint]) 1762 vmin = 0; 1763 1764 b.Btablesize = cast(int) (vmax - vmin + 1) * tysize(TYnptr); 1765 regm_t retregs = IDXREGS; 1766 if (dword) 1767 retregs |= mMSW; 1768 if (config.exe & EX_posix && I32 && config.flags3 & CFG3pic) 1769 retregs &= ~mBX; // need EBX for GOT 1770 bool modify = (I16 || I64 || vmin); 1771 scodelem(cdb,e,&retregs,0,!modify); 1772 reg_t reg = findreg(retregs & IDXREGS); // reg that result is in 1773 reg_t reg2; 1774 if (dword) 1775 reg2 = findregmsw(retregs); 1776 if (modify) 1777 { 1778 assert(!(retregs & regcon.mvar)); 1779 getregs(cdb,retregs); 1780 } 1781 if (vmin) // if there is a minimum 1782 { 1783 cdb.genc2(0x81,modregrm(3,5,reg),cast(targ_size_t)vmin); // SUB reg,vmin 1784 if (dword) 1785 { cdb.genc2(0x81,modregrm(3,3,reg2),cast(targ_size_t)MSREG(vmin)); // SBB reg2,vmin 1786 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1787 } 1788 } 1789 else if (dword) 1790 { gentstreg(cdb,reg2); // TEST reg2,reg2 1791 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1792 } 1793 if (vmax - vmin != REGMASK) // if there is a maximum 1794 { // CMP reg,vmax-vmin 1795 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)(vmax-vmin)); 1796 if (I64 && sz == 8) 1797 code_orrex(cdb.last(), REX_W); 1798 genjmp(cdb,JA,FLblock,b.nthSucc(0)); // JA default 1799 } 1800 if (I64) 1801 { 1802 if (!vmin) 1803 { // Need to clear out high 32 bits of reg 1804 // Use 8B instead of 89, as 89 will be optimized away as a NOP 1805 genregs(cdb,0x8B,reg,reg); // MOV reg,reg 1806 } 1807 if (config.flags3 & CFG3pic || config.exe == EX_WIN64) 1808 { 1809 /* LEA R1,disp[RIP] 48 8D 05 00 00 00 00 1810 * MOVSXD R2,[reg*4][R1] 48 63 14 B8 1811 * LEA R1,[R1][R2] 48 8D 04 02 1812 * JMP R1 FF E0 1813 */ 1814 reg_t r1; 1815 regm_t scratchm = ALLREGS & ~mask(reg); 1816 allocreg(cdb,&scratchm,&r1,TYint); 1817 reg_t r2; 1818 scratchm = ALLREGS & ~(mask(reg) | mask(r1)); 1819 allocreg(cdb,&scratchm,&r2,TYint); 1820 1821 CodeBuilder cdbe; cdbe.ctor(); 1822 cdbe.genc1(LEA,(REX_W << 16) | modregxrm(0,r1,5),FLswitch,0); // LEA R1,disp[RIP] 1823 cdbe.last().IEV1.Vswitch = b; 1824 cdbe.gen2sib(0x63,(REX_W << 16) | modregxrm(0,r2,4), modregxrmx(2,reg,r1)); // MOVSXD R2,[reg*4][R1] 1825 cdbe.gen2sib(LEA,(REX_W << 16) | modregxrm(0,r1,4),modregxrmx(0,r1,r2)); // LEA R1,[R1][R2] 1826 cdbe.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1827 1828 b.Btablesize = cast(int) (vmax - vmin + 1) * 4; 1829 code *ce = cdbe.finish(); 1830 pinholeopt(ce, null); 1831 1832 cdb.append(cdbe); 1833 } 1834 else 1835 { 1836 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[reg*8] 1837 cdb.last().IEV1.Vswitch = b; 1838 cdb.last().Isib = modregrm(3,reg & 7,5); 1839 if (reg & 8) 1840 cdb.last().Irex |= REX_X; 1841 } 1842 } 1843 else if (I32) 1844 { 1845 static if (JMPJMPTABLE) 1846 { 1847 /* LEA jreg,offset ctable[reg][reg * 4] 1848 JMP jreg 1849 ctable: 1850 JMP case0 1851 JMP case1 1852 ... 1853 */ 1854 CodeBuilder ctable; ctable.ctor(); 1855 block *bdef = b.nthSucc(0); 1856 targ_llong u; 1857 for (u = vmin; ; u++) 1858 { block *targ = bdef; 1859 for (n = 0; n < ncases; n++) 1860 { 1861 if (p[n] == u) 1862 { targ = b.nthSucc(n + 1); 1863 break; 1864 } 1865 } 1866 genjmp(ctable,JMP,FLblock,targ); 1867 ctable.last().Iflags |= CFjmp5; // don't shrink these 1868 if (u == vmax) 1869 break; 1870 } 1871 1872 // Allocate scratch register jreg 1873 regm_t scratchm = ALLREGS & ~mask(reg); 1874 uint jreg = AX; 1875 allocreg(cdb,&scratchm,&jreg,TYint); 1876 1877 // LEA jreg, offset ctable[reg][reg*4] 1878 cdb.genc1(LEA,modregrm(2,jreg,4),FLcode,6); 1879 cdb.last().Isib = modregrm(2,reg,reg); 1880 cdb.gen2(0xFF,modregrm(3,4,jreg)); // JMP jreg 1881 cdb.append(ctable); 1882 b.Btablesize = 0; 1883 cgstate.stackclean--; 1884 return; 1885 } 1886 else 1887 { 1888 if (config.exe & (EX_OSX | EX_OSX64)) 1889 { 1890 /* CALL L1 1891 * L1: POP R1 1892 * ADD R1,disp[reg*4][R1] 1893 * JMP R1 1894 */ 1895 // Allocate scratch register r1 1896 regm_t scratchm = ALLREGS & ~mask(reg); 1897 reg_t r1; 1898 allocreg(cdb,&scratchm,&r1,TYint); 1899 1900 cdb.genc2(CALL,0,0); // CALL L1 1901 cdb.gen1(0x58 + r1); // L1: POP R1 1902 cdb.genc1(0x03,modregrm(2,r1,4),FLswitch,0); // ADD R1,disp[reg*4][EBX] 1903 cdb.last().IEV1.Vswitch = b; 1904 cdb.last().Isib = modregrm(2,reg,r1); 1905 cdb.gen2(0xFF,modregrm(3,4,r1)); // JMP R1 1906 } 1907 else 1908 { 1909 if (config.flags3 & CFG3pic) 1910 { 1911 /* MOV R1,EBX 1912 * SUB R1,funcsym_p@GOTOFF[offset][reg*4][EBX] 1913 * JMP R1 1914 */ 1915 1916 // Load GOT in EBX 1917 load_localgot(cdb); 1918 1919 // Allocate scratch register r1 1920 regm_t scratchm = ALLREGS & ~(mask(reg) | mBX); 1921 reg_t r1; 1922 allocreg(cdb,&scratchm,&r1,TYint); 1923 1924 genmovreg(cdb,r1,BX); // MOV R1,EBX 1925 cdb.genc1(0x2B,modregxrm(2,r1,4),FLswitch,0); // SUB R1,disp[reg*4][EBX] 1926 cdb.last().IEV1.Vswitch = b; 1927 cdb.last().Isib = modregrm(2,reg,BX); 1928 cdb.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1929 } 1930 else 1931 { 1932 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[idxreg*4] 1933 cdb.last().IEV1.Vswitch = b; 1934 cdb.last().Isib = modregrm(2,reg,5); 1935 } 1936 } 1937 } 1938 } 1939 else if (I16) 1940 { 1941 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 1942 uint rm = getaddrmode(retregs) | modregrm(0,4,0); 1943 cdb.genc1(0xFF,rm,FLswitch,0); // JMP [CS:]disp[idxreg] 1944 cdb.last().IEV1.Vswitch = b; 1945 cdb.last().Iflags |= csseg ? CFcs : 0; // segment override 1946 } 1947 else 1948 assert(0); 1949 cgstate.stackclean--; 1950 return; 1951 } 1952 1953 /*************************************************************************/ 1954 { 1955 /* Scan a table of case values, and jump to corresponding address. 1956 * Since it relies on REPNE SCASW, it has really nothing to recommend it 1957 * over Lifthen for 32 and 64 bit code. 1958 * Note that it has not been tested with MACHOBJ (OSX). 1959 */ 1960 Lswitch: 1961 regm_t retregs = mAX; // SCASW requires AX 1962 if (dword) 1963 retregs |= mDX; 1964 else if (ncases <= 6 || config.flags4 & CFG4speed) 1965 goto Lifthen; 1966 scodelem(cdb,e,&retregs,0,true); 1967 if (dword && mswsame) 1968 { /* CMP DX,MSW */ 1969 cdb.genc2(0x81,modregrm(3,7,DX),msw); 1970 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1971 } 1972 getregs(cdb,mCX|mDI); 1973 1974 if (config.flags3 & CFG3pic && config.exe & EX_posix) 1975 { // Add in GOT 1976 getregs(cdb,mDX); 1977 cdb.genc2(CALL,0,0); // CALL L1 1978 cdb.gen1(0x58 + DI); // L1: POP EDI 1979 1980 // ADD EDI,_GLOBAL_OFFSET_TABLE_+3 1981 Symbol *gotsym = Obj.getGOTsym(); 1982 cdb.gencs(0x81,modregrm(3,0,DI),FLextern,gotsym); 1983 cdb.last().Iflags = CFoff; 1984 cdb.last().IEV2.Voffset = 3; 1985 1986 makeitextern(gotsym); 1987 1988 genmovreg(cdb, DX, DI); // MOV EDX, EDI 1989 // ADD EDI,offset of switch table 1990 cdb.gencs(0x81,modregrm(3,0,DI),FLswitch,null); 1991 cdb.last().IEV2.Vswitch = b; 1992 } 1993 1994 if (!(config.flags3 & CFG3pic)) 1995 { 1996 // MOV DI,offset of switch table 1997 cdb.gencs(0xC7,modregrm(3,0,DI),FLswitch,null); 1998 cdb.last().IEV2.Vswitch = b; 1999 } 2000 movregconst(cdb,CX,ncases,0); // MOV CX,ncases 2001 2002 /* The switch table will be accessed through ES:DI. 2003 * Therefore, load ES with proper segment value. 2004 */ 2005 if (config.flags3 & CFG3eseqds) 2006 { 2007 assert(!csseg); 2008 getregs(cdb,mCX); // allocate CX 2009 } 2010 else 2011 { 2012 getregs(cdb,mES|mCX); // allocate ES and CX 2013 cdb.gen1(csseg ? 0x0E : 0x1E); // PUSH CS/DS 2014 cdb.gen1(0x07); // POP ES 2015 } 2016 2017 targ_size_t disp = (ncases - 1) * _tysize[TYint]; // displacement to jump table 2018 if (dword && !mswsame) 2019 { 2020 2021 /* Build the following: 2022 L1: SCASW 2023 JNE L2 2024 CMP DX,[CS:]disp[DI] 2025 L2: LOOPNE L1 2026 */ 2027 2028 const int mod = (disp > 127) ? 2 : 1; // displacement size 2029 code *cloop = genc2(null,0xE0,0,-7 - mod - csseg); // LOOPNE scasw 2030 cdb.gen1(0xAF); // SCASW 2031 code_orflag(cdb.last(),CFtarg2); // target of jump 2032 genjmp(cdb,JNE,FLcode,cast(block *) cloop); // JNE loop 2033 // CMP DX,[CS:]disp[DI] 2034 cdb.genc1(0x39,modregrm(mod,DX,5),FLconst,disp); 2035 cdb.last().Iflags |= csseg ? CFcs : 0; // possible seg override 2036 cdb.append(cloop); 2037 disp += ncases * _tysize[TYint]; // skip over msw table 2038 } 2039 else 2040 { 2041 cdb.gen1(0xF2); // REPNE 2042 cdb.gen1(0xAF); // SCASW 2043 } 2044 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 2045 const int mod = (disp > 127) ? 2 : 1; // 1 or 2 byte displacement 2046 if (csseg) 2047 cdb.gen1(SEGCS); // table is in code segment 2048 2049 if (config.flags3 & CFG3pic && 2050 config.exe & EX_posix) 2051 { // ADD EDX,(ncases-1)*2[EDI] 2052 cdb.genc1(0x03,modregrm(mod,DX,7),FLconst,disp); 2053 // JMP EDX 2054 cdb.gen2(0xFF,modregrm(3,4,DX)); 2055 } 2056 2057 if (!(config.flags3 & CFG3pic)) 2058 { // JMP (ncases-1)*2[DI] 2059 cdb.genc1(0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp); 2060 cdb.last().Iflags |= csseg ? CFcs : 0; 2061 } 2062 b.Btablesize = disp + _tysize[TYint] + ncases * tysize(TYnptr); 2063 //assert(b.Bcode); 2064 cgstate.stackclean--; 2065 return; 2066 } 2067 } 2068 2069 /****************************** 2070 * Output data block for a jump table (BCjmptab). 2071 * The 'holes' in the table get filled with the 2072 * default label. 2073 */ 2074 2075 void outjmptab(block *b) 2076 { 2077 if (JMPJMPTABLE && I32) 2078 return; 2079 2080 targ_llong *p = b.Bswitch; // pointer to case data 2081 size_t ncases = cast(size_t)*p++; // number of cases 2082 2083 /* Find vmin and vmax, the range of the table will be [vmin .. vmax + 1] 2084 * Must be same computation as used in doswitch(). 2085 */ 2086 targ_llong vmax = MINLL; // smallest possible llong 2087 targ_llong vmin = MAXLL; // largest possible llong 2088 for (size_t n = 0; n < ncases; n++) // find min case value 2089 { targ_llong val = p[n]; 2090 if (val > vmax) vmax = val; 2091 if (val < vmin) vmin = val; 2092 } 2093 if (vmin > 0 && vmin <= _tysize[TYint]) 2094 vmin = 0; 2095 assert(vmin <= vmax); 2096 2097 /* Segment and offset into which the jump table will be emitted 2098 */ 2099 int jmpseg = objmod.jmpTableSegment(funcsym_p); 2100 targ_size_t *poffset = &Offset(jmpseg); 2101 2102 /* Align start of jump table 2103 */ 2104 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 2105 objmod.lidata(jmpseg,*poffset,alignbytes); 2106 assert(*poffset == b.Btableoffset); // should match precomputed value 2107 2108 Symbol *gotsym = null; 2109 targ_size_t def = b.nthSucc(0).Boffset; // default address 2110 for (targ_llong u = vmin; ; u++) 2111 { targ_size_t targ = def; // default 2112 for (size_t n = 0; n < ncases; n++) 2113 { if (p[n] == u) 2114 { targ = b.nthSucc(cast(int)(n + 1)).Boffset; 2115 break; 2116 } 2117 } 2118 if (config.exe & (EX_LINUX64 | EX_FREEBSD64 | EX_OPENBSD64 | EX_DRAGONFLYBSD64 | EX_SOLARIS64)) 2119 { 2120 if (config.flags3 & CFG3pic) 2121 { 2122 objmod.reftodatseg(jmpseg,*poffset,cast(targ_size_t)(targ + (u - vmin) * 4),funcsym_p.Sseg,CFswitch); 2123 *poffset += 4; 2124 } 2125 else 2126 { 2127 objmod.reftodatseg(jmpseg,*poffset,targ,funcsym_p.Sxtrnnum,CFoffset64 | CFswitch); 2128 *poffset += 8; 2129 } 2130 } 2131 else if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) 2132 { 2133 if (config.flags3 & CFG3pic) 2134 { 2135 assert(config.flags & CFGromable); 2136 // Want a GOTPC fixup to _GLOBAL_OFFSET_TABLE_ 2137 if (!gotsym) 2138 gotsym = Obj.getGOTsym(); 2139 objmod.reftoident(jmpseg,*poffset,gotsym,*poffset - targ,CFswitch); 2140 } 2141 else 2142 objmod.reftocodeseg(jmpseg,*poffset,targ); 2143 *poffset += 4; 2144 } 2145 else if (config.exe & (EX_OSX | EX_OSX64)) 2146 { 2147 targ_size_t val; 2148 if (I64) 2149 val = targ - b.Btableoffset; 2150 else 2151 val = targ - b.Btablebase; 2152 objmod.write_bytes(SegData[jmpseg],4,&val); 2153 } 2154 else 2155 { 2156 if (I64) 2157 { 2158 targ_size_t val = targ - b.Btableoffset; 2159 objmod.write_bytes(SegData[jmpseg],4,&val); 2160 } 2161 else 2162 { 2163 objmod.reftocodeseg(jmpseg,*poffset,targ); 2164 *poffset += tysize(TYnptr); 2165 } 2166 } 2167 2168 if (u == vmax) // for case that (vmax == ~0) 2169 break; 2170 } 2171 } 2172 2173 2174 /****************************** 2175 * Output data block for a switch table. 2176 * Two consecutive tables, the first is the case value table, the 2177 * second is the address table. 2178 */ 2179 2180 void outswitab(block *b) 2181 { 2182 //printf("outswitab()\n"); 2183 targ_llong *p = b.Bswitch; // pointer to case data 2184 uint ncases = cast(uint)*p++; // number of cases 2185 2186 const int seg = objmod.jmpTableSegment(funcsym_p); 2187 targ_size_t *poffset = &Offset(seg); 2188 targ_size_t offset = *poffset; 2189 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 2190 objmod.lidata(seg,*poffset,alignbytes); // any alignment bytes necessary 2191 assert(*poffset == offset + alignbytes); 2192 2193 uint sz = _tysize[TYint]; 2194 assert(SegData[seg].SDseg == seg); 2195 for (uint n = 0; n < ncases; n++) // send out value table 2196 { 2197 //printf("\tcase %d, offset = x%x\n", n, *poffset); 2198 objmod.write_bytes(SegData[seg],sz,p); 2199 p++; 2200 } 2201 offset += alignbytes + sz * ncases; 2202 assert(*poffset == offset); 2203 2204 if (b.Btablesize == ncases * (REGSIZE * 2 + tysize(TYnptr))) 2205 { 2206 // Send out MSW table 2207 p -= ncases; 2208 for (uint n = 0; n < ncases; n++) 2209 { 2210 targ_size_t val = cast(targ_size_t)MSREG(*p); 2211 p++; 2212 objmod.write_bytes(SegData[seg],REGSIZE,&val); 2213 } 2214 offset += REGSIZE * ncases; 2215 assert(*poffset == offset); 2216 } 2217 2218 list_t bl = b.Bsucc; 2219 for (uint n = 0; n < ncases; n++) // send out address table 2220 { 2221 bl = list_next(bl); 2222 objmod.reftocodeseg(seg,*poffset,list_block(bl).Boffset); 2223 *poffset += tysize(TYnptr); 2224 } 2225 assert(*poffset == offset + ncases * tysize(TYnptr)); 2226 } 2227 2228 /***************************** 2229 * Return a jump opcode relevant to the elem for a JMP true. 2230 */ 2231 2232 int jmpopcode(elem *e) 2233 { 2234 tym_t tym; 2235 int zero,i,jp,op; 2236 static immutable ubyte[6][2][2] jops = 2237 [ /* <= > < >= == != <=0 >0 <0 >=0 ==0 !=0 */ 2238 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JS ,JNS,JE ,JNE] ], /* signed */ 2239 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JE ,JNE,JB ,JAE,JE ,JNE] ], /* uint */ 2240 /+ 2241 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JL ,JGE,JE ,JNE] ], /* real */ 2242 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 */ 2243 [ [JA ,JBE,JAE,JB ,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 R */ 2244 +/ 2245 ]; 2246 2247 enum 2248 { 2249 XP = (JP << 8), 2250 XNP = (JNP << 8), 2251 } 2252 static immutable uint[26][1] jfops = 2253 /* le gt lt ge eqeq ne unord lg leg ule ul uge */ 2254 [ 2255 [ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP, JNE,JNP, JBE,JC,XP|JAE, 2256 2257 /* ug ue ngt nge nlt nle ord nlg nleg nule nul nuge nug nue */ 2258 XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP, JA, JNC,XNP|JB, XNP|JBE,JNE ], /* 8087 */ 2259 ]; 2260 2261 assert(e); 2262 while (e.Eoper == OPcomma || 2263 /* The OTleaf(e.EV.E1.Eoper) is to line up with the case in cdeq() where */ 2264 /* we decide if mPSW is passed on when evaluating E2 or not. */ 2265 (e.Eoper == OPeq && OTleaf(e.EV.E1.Eoper))) 2266 { 2267 e = e.EV.E2; /* right operand determines it */ 2268 } 2269 2270 op = e.Eoper; 2271 tym_t tymx = tybasic(e.Ety); 2272 bool needsNanCheck = tyfloating(tymx) && config.inline8087 && 2273 (tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble || 2274 tymx == TYcdouble || tymx == TYcfloat || 2275 (tyxmmreg(tymx) && config.fpxmmregs && e.Ecount != e.Ecomsub) || 2276 op == OPind || 2277 (OTcall(op) && (regmask(tymx, tybasic(e.EV.E1.Eoper)) & (mST0 | XMMREGS)))); 2278 if (e.Ecount != e.Ecomsub) // comsubs just get Z bit set 2279 { 2280 if (needsNanCheck) // except for floating point values that need a NaN check 2281 return XP|JNE; 2282 else 2283 return JNE; 2284 } 2285 if (!OTrel(op)) // not relational operator 2286 { 2287 if (needsNanCheck) 2288 return XP|JNE; 2289 2290 if (op == OPu32_64) { e = e.EV.E1; op = e.Eoper; } 2291 if (op == OPu16_32) { e = e.EV.E1; op = e.Eoper; } 2292 if (op == OPu8_16) op = e.EV.E1.Eoper; 2293 return ((op >= OPbt && op <= OPbts) || op == OPbtst) ? JC : JNE; 2294 } 2295 2296 if (e.EV.E2.Eoper == OPconst) 2297 zero = !boolres(e.EV.E2); 2298 else 2299 zero = 0; 2300 2301 tym = e.EV.E1.Ety; 2302 if (tyfloating(tym)) 2303 { 2304 static if (1) 2305 { 2306 i = 0; 2307 if (config.inline8087) 2308 { i = 1; 2309 2310 static if (1) 2311 { 2312 if (rel_exception(op) || config.flags4 & CFG4fastfloat) 2313 { 2314 const bool NOSAHF = (I64 || config.fpxmmregs); 2315 if (zero) 2316 { 2317 if (NOSAHF) 2318 op = swaprel(op); 2319 } 2320 else if (NOSAHF) 2321 op = swaprel(op); 2322 else if (cmporder87(e.EV.E2)) 2323 op = swaprel(op); 2324 else 2325 { } 2326 } 2327 else 2328 { 2329 if (zero && config.target_cpu < TARGET_80386) 2330 { } 2331 else 2332 op = swaprel(op); 2333 } 2334 } 2335 else 2336 { 2337 if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386) 2338 op = swaprel(op); 2339 else if (!zero && 2340 (cmporder87(e.EV.E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat))) 2341 /* compare is reversed */ 2342 op = swaprel(op); 2343 } 2344 } 2345 jp = jfops[0][op - OPle]; 2346 goto L1; 2347 } 2348 else 2349 { 2350 i = (config.inline8087) ? (3 + cmporder87(e.EV.E2)) : 2; 2351 } 2352 } 2353 else if (tyuns(tym) || tyuns(e.EV.E2.Ety)) 2354 i = 1; 2355 else if (tyintegral(tym) || typtr(tym)) 2356 i = 0; 2357 else 2358 { 2359 debug 2360 elem_print(e); 2361 WRTYxx(tym); 2362 assert(0); 2363 } 2364 2365 jp = jops[i][zero][op - OPle]; /* table starts with OPle */ 2366 2367 /* Try to rewrite uint comparisons so they rely on just the Carry flag 2368 */ 2369 if (i == 1 && (jp == JA || jp == JBE) && 2370 (e.EV.E2.Eoper != OPconst && e.EV.E2.Eoper != OPrelconst)) 2371 { 2372 jp = (jp == JA) ? JC : JNC; 2373 } 2374 2375 L1: 2376 debug 2377 if ((jp & 0xF0) != 0x70) 2378 { 2379 WROP(op); 2380 printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp); 2381 } 2382 2383 assert((jp & 0xF0) == 0x70); 2384 return jp; 2385 } 2386 2387 /********************************** 2388 * Append code to cdb which validates pointer described by 2389 * addressing mode in *pcs. Modify addressing mode in *pcs. 2390 * Params: 2391 * cdb = append generated code to this 2392 * pcs = original addressing mode to be updated 2393 * keepmsk = mask of registers we must not destroy or use 2394 * if (keepmsk & RMstore), this will be only a store operation 2395 * into the lvalue 2396 */ 2397 2398 void cod3_ptrchk(ref CodeBuilder cdb,code *pcs,regm_t keepmsk) 2399 { 2400 ubyte sib; 2401 reg_t reg; 2402 uint flagsave; 2403 2404 assert(!I64); 2405 if (!I16 && pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2406 return; // not designed to deal with 48 bit far pointers 2407 2408 ubyte rm = pcs.Irm; 2409 assert(!(rm & 0x40)); // no disp8 or reg addressing modes 2410 2411 // If the addressing mode is already a register 2412 reg = rm & 7; 2413 if (I16) 2414 { static immutable ubyte[8] imode = [ BP,BP,BP,BP,SI,DI,BP,BX ]; 2415 2416 reg = imode[reg]; // convert [SI] to SI, etc. 2417 } 2418 regm_t idxregs = mask(reg); 2419 if ((rm & 0x80 && (pcs.IFL1 != FLoffset || pcs.IEV1.Vuns)) || 2420 !(idxregs & ALLREGS) 2421 ) 2422 { 2423 // Load the offset into a register, so we can push the address 2424 regm_t idxregs2 = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs 2425 assert(idxregs2); 2426 allocreg(cdb,&idxregs2,®,TYoffset); 2427 2428 const opsave = pcs.Iop; 2429 flagsave = pcs.Iflags; 2430 pcs.Iop = LEA; 2431 pcs.Irm |= modregrm(0,reg,0); 2432 pcs.Iflags &= ~(CFopsize | CFss | CFes | CFcs); // no prefix bytes needed 2433 cdb.gen(pcs); // LEA reg,EA 2434 2435 pcs.Iflags = flagsave; 2436 pcs.Iop = opsave; 2437 } 2438 2439 // registers destroyed by the function call 2440 //used = (mBP | ALLREGS | mES) & ~fregsaved; 2441 regm_t used = 0; // much less code generated this way 2442 2443 code *cs2 = null; 2444 regm_t tosave = used & (keepmsk | idxregs); 2445 for (int i = 0; tosave; i++) 2446 { 2447 regm_t mi = mask(i); 2448 2449 assert(i < REGMAX); 2450 if (mi & tosave) /* i = register to save */ 2451 { 2452 int push,pop; 2453 2454 stackchanged = 1; 2455 if (i == ES) 2456 { push = 0x06; 2457 pop = 0x07; 2458 } 2459 else 2460 { push = 0x50 + i; 2461 pop = push | 8; 2462 } 2463 cdb.gen1(push); // PUSH i 2464 cs2 = cat(gen1(null,pop),cs2); // POP i 2465 tosave &= ~mi; 2466 } 2467 } 2468 2469 // For 16 bit models, push a far pointer 2470 if (I16) 2471 { 2472 int segreg; 2473 2474 switch (pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2475 { case CFes: segreg = 0x06; break; 2476 case CFss: segreg = 0x16; break; 2477 case CFcs: segreg = 0x0E; break; 2478 case 0: segreg = 0x1E; break; // DS 2479 default: 2480 assert(0); 2481 } 2482 2483 // See if we should default to SS: 2484 // (Happens when BP is part of the addressing mode) 2485 if (segreg == 0x1E && (rm & 0xC0) != 0xC0 && 2486 rm & 2 && (rm & 7) != 7) 2487 { 2488 segreg = 0x16; 2489 if (config.wflags & WFssneds) 2490 pcs.Iflags |= CFss; // because BP won't be there anymore 2491 } 2492 cdb.gen1(segreg); // PUSH segreg 2493 } 2494 2495 cdb.gen1(0x50 + reg); // PUSH reg 2496 2497 // Rewrite the addressing mode in *pcs so it is just 0[reg] 2498 setaddrmode(pcs, idxregs); 2499 pcs.IFL1 = FLoffset; 2500 pcs.IEV1.Vuns = 0; 2501 2502 // Call the validation function 2503 { 2504 makeitextern(getRtlsym(RTLSYM_PTRCHK)); 2505 2506 used &= ~(keepmsk | idxregs); // regs destroyed by this exercise 2507 getregs(cdb,used); 2508 // CALL __ptrchk 2509 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_PTRCHK)); 2510 } 2511 2512 cdb.append(cs2); 2513 } 2514 2515 /*********************************** 2516 * Determine if BP can be used as a general purpose register. 2517 * Note parallels between this routine and prolog(). 2518 * Returns: 2519 * 0 can't be used, needed for frame 2520 * mBP can be used 2521 */ 2522 2523 regm_t cod3_useBP() 2524 { 2525 tym_t tym; 2526 tym_t tyf; 2527 2528 // Note that DOSX memory model cannot use EBP as a general purpose 2529 // register, as SS != DS. 2530 if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp)) 2531 goto Lcant; 2532 2533 if (anyiasm) 2534 goto Lcant; 2535 2536 tyf = funcsym_p.ty(); 2537 if (tyf & mTYnaked) // if no prolog/epilog for function 2538 goto Lcant; 2539 2540 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh) 2541 { 2542 goto Lcant; // need consistent stack frame 2543 } 2544 2545 tym = tybasic(tyf); 2546 if (tym == TYifunc) 2547 goto Lcant; 2548 2549 stackoffsets(globsym, true); // estimate stack offsets 2550 localsize = Auto.offset + Fast.offset; // an estimate only 2551 // if (localsize) 2552 { 2553 if (!(config.flags4 & CFG4speed) || 2554 config.target_cpu < TARGET_Pentium || 2555 tyfarfunc(tym) || 2556 config.flags & CFGstack || 2557 localsize >= 0x100 || // arbitrary value < 0x1000 2558 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 2559 calledFinally || 2560 Alloca.size 2561 ) 2562 goto Lcant; 2563 } 2564 return mBP; 2565 2566 Lcant: 2567 return 0; 2568 } 2569 2570 /************************************************* 2571 * Generate code segment to be used later to restore a cse 2572 */ 2573 2574 bool cse_simple(code *c, elem *e) 2575 { 2576 regm_t regm; 2577 reg_t reg; 2578 int sz = tysize(e.Ety); 2579 2580 if (!I16 && // don't bother with 16 bit code 2581 e.Eoper == OPadd && 2582 sz == REGSIZE && 2583 e.EV.E2.Eoper == OPconst && 2584 e.EV.E1.Eoper == OPvar && 2585 isregvar(e.EV.E1,®m,®) && 2586 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2587 ) 2588 { 2589 memset(c,0,(*c).sizeof); 2590 2591 // Make this an LEA instruction 2592 c.Iop = LEA; 2593 buildEA(c,reg,-1,1,e.EV.E2.EV.Vuns); 2594 if (I64) 2595 { if (sz == 8) 2596 c.Irex |= REX_W; 2597 } 2598 2599 return true; 2600 } 2601 else if (e.Eoper == OPind && 2602 sz <= REGSIZE && 2603 e.EV.E1.Eoper == OPvar && 2604 isregvar(e.EV.E1,®m,®) && 2605 (I32 || I64 || regm & IDXREGS) && 2606 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2607 ) 2608 { 2609 memset(c,0,(*c).sizeof); 2610 2611 // Make this a MOV instruction 2612 c.Iop = (sz == 1) ? 0x8A : 0x8B; // MOV reg,EA 2613 buildEA(c,reg,-1,1,0); 2614 if (sz == 2 && I32) 2615 c.Iflags |= CFopsize; 2616 else if (I64) 2617 { if (sz == 8) 2618 c.Irex |= REX_W; 2619 } 2620 2621 return true; 2622 } 2623 return false; 2624 } 2625 2626 /************************** 2627 * Store `reg` to the common subexpression save area in index `slot`. 2628 * Params: 2629 * cdb = where to write code to 2630 * tym = type of value that's in `reg` 2631 * reg = register to save 2632 * slot = index into common subexpression save area 2633 */ 2634 void gen_storecse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2635 { 2636 // MOV slot[BP],reg 2637 if (isXMMreg(reg) && config.fpxmmregs) // watch out for ES 2638 { 2639 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2640 const op = xmmstore(tym, aligned); 2641 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2642 return; 2643 } 2644 opcode_t op = STO; // normal mov 2645 if (reg == ES) 2646 { 2647 reg = 0; // the real reg number 2648 op = 0x8C; // segment reg mov 2649 } 2650 cdb.genc1(op,modregxrm(2, reg, BPRM),FLcs,cast(targ_uns)slot); 2651 if (I64) 2652 code_orrex(cdb.last(), REX_W); 2653 } 2654 2655 void gen_testcse(ref CodeBuilder cdb, tym_t tym, uint sz, size_t slot) 2656 { 2657 // CMP slot[BP],0 2658 cdb.genc(sz == 1 ? 0x80 : 0x81,modregrm(2,7,BPRM), 2659 FLcs,cast(targ_uns)slot, FLconst,cast(targ_uns) 0); 2660 if ((I64 || I32) && sz == 2) 2661 cdb.last().Iflags |= CFopsize; 2662 if (I64 && sz == 8) 2663 code_orrex(cdb.last(), REX_W); 2664 } 2665 2666 void gen_loadcse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2667 { 2668 // MOV reg,slot[BP] 2669 if (isXMMreg(reg) && config.fpxmmregs) 2670 { 2671 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2672 const op = xmmload(tym, aligned); 2673 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2674 return; 2675 } 2676 opcode_t op = LOD; 2677 if (reg == ES) 2678 { 2679 op = 0x8E; 2680 reg = 0; 2681 } 2682 cdb.genc1(op,modregxrm(2,reg,BPRM),FLcs,cast(targ_uns)slot); 2683 if (I64) 2684 code_orrex(cdb.last(), REX_W); 2685 } 2686 2687 /*************************************** 2688 * Gen code for OPframeptr 2689 */ 2690 2691 void cdframeptr(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2692 { 2693 regm_t retregs = *pretregs & allregs; 2694 if (!retregs) 2695 retregs = allregs; 2696 reg_t reg; 2697 allocreg(cdb,&retregs, ®, TYint); 2698 2699 code cs; 2700 cs.Iop = ESCAPE | ESCframeptr; 2701 cs.Iflags = 0; 2702 cs.Irex = 0; 2703 cs.Irm = cast(ubyte)reg; 2704 cdb.gen(&cs); 2705 fixresult(cdb,e,retregs,pretregs); 2706 } 2707 2708 /*************************************** 2709 * Gen code for load of _GLOBAL_OFFSET_TABLE_. 2710 * This value gets cached in the local variable 'localgot'. 2711 */ 2712 2713 void cdgot(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2714 { 2715 if (config.exe & (EX_OSX | EX_OSX64)) 2716 { 2717 regm_t retregs = *pretregs & allregs; 2718 if (!retregs) 2719 retregs = allregs; 2720 reg_t reg; 2721 allocreg(cdb,&retregs, ®, TYnptr); 2722 2723 cdb.genc(CALL,0,0,0,FLgot,0); // CALL L1 2724 cdb.gen1(0x58 + reg); // L1: POP reg 2725 2726 fixresult(cdb,e,retregs,pretregs); 2727 } 2728 else if (config.exe & EX_posix) 2729 { 2730 regm_t retregs = *pretregs & allregs; 2731 if (!retregs) 2732 retregs = allregs; 2733 reg_t reg; 2734 allocreg(cdb,&retregs, ®, TYnptr); 2735 2736 cdb.genc2(CALL,0,0); // CALL L1 2737 cdb.gen1(0x58 + reg); // L1: POP reg 2738 2739 // ADD reg,_GLOBAL_OFFSET_TABLE_+3 2740 Symbol *gotsym = Obj.getGOTsym(); 2741 cdb.gencs(0x81,modregrm(3,0,reg),FLextern,gotsym); 2742 /* Because the 2:3 offset from L1: is hardcoded, 2743 * this sequence of instructions must not 2744 * have any instructions in between, 2745 * so set CFvolatile to prevent the scheduler from rearranging it. 2746 */ 2747 code *cgot = cdb.last(); 2748 cgot.Iflags = CFoff | CFvolatile; 2749 cgot.IEV2.Voffset = (reg == AX) ? 2 : 3; 2750 2751 makeitextern(gotsym); 2752 fixresult(cdb,e,retregs,pretregs); 2753 } 2754 else 2755 assert(0); 2756 } 2757 2758 /************************************************** 2759 * Load contents of localgot into EBX. 2760 */ 2761 2762 void load_localgot(ref CodeBuilder cdb) 2763 { 2764 if (config.exe & (EX_LINUX | EX_FREEBSD | EX_OPENBSD | EX_SOLARIS)) // note: I32 only 2765 { 2766 if (config.flags3 & CFG3pic) 2767 { 2768 if (localgot && !(localgot.Sflags & SFLdead)) 2769 { 2770 localgot.Sflags &= ~GTregcand; // because this hack doesn't work with reg allocator 2771 elem *e = el_var(localgot); 2772 regm_t retregs = mBX; 2773 codelem(cdb,e,&retregs,false); 2774 el_free(e); 2775 } 2776 else 2777 { 2778 elem *e = el_long(TYnptr, 0); 2779 e.Eoper = OPgot; 2780 regm_t retregs = mBX; 2781 codelem(cdb,e,&retregs,false); 2782 el_free(e); 2783 } 2784 } 2785 } 2786 } 2787 2788 /***************************** 2789 * Returns: 2790 * # of bytes stored 2791 */ 2792 2793 2794 int obj_namestring(char *p,const(char)* name) 2795 { 2796 size_t len = strlen(name); 2797 if (len > 255) 2798 { 2799 short *ps = cast(short *)p; 2800 p[0] = 0xFF; 2801 p[1] = 0; 2802 ps[1] = cast(short)len; 2803 memcpy(p + 4,name,len); 2804 const int ONS_OHD = 4; // max # of extra bytes added by obj_namestring() 2805 len += ONS_OHD; 2806 } 2807 else 2808 { 2809 p[0] = cast(char)len; 2810 memcpy(p + 1,name,len); 2811 len++; 2812 } 2813 return cast(int)len; 2814 } 2815 2816 void genregs(ref CodeBuilder cdb,opcode_t op,uint dstreg,uint srcreg) 2817 { 2818 return cdb.gen2(op,modregxrmx(3,dstreg,srcreg)); 2819 } 2820 2821 void gentstreg(ref CodeBuilder cdb, uint t) 2822 { 2823 cdb.gen2(0x85,modregxrmx(3,t,t)); // TEST t,t 2824 code_orflag(cdb.last(),CFpsw); 2825 } 2826 2827 void genpush(ref CodeBuilder cdb, reg_t reg) 2828 { 2829 cdb.gen1(0x50 + (reg & 7)); 2830 if (reg & 8) 2831 code_orrex(cdb.last(), REX_B); 2832 } 2833 2834 void genpop(ref CodeBuilder cdb, reg_t reg) 2835 { 2836 cdb.gen1(0x58 + (reg & 7)); 2837 if (reg & 8) 2838 code_orrex(cdb.last(), REX_B); 2839 } 2840 2841 /************************** 2842 * Generate a MOV to,from register instruction. 2843 * Smart enough to dump redundant register moves, and segment 2844 * register moves. 2845 */ 2846 2847 code *genmovreg(uint to,uint from) 2848 { 2849 CodeBuilder cdb; cdb.ctor(); 2850 genmovreg(cdb, to, from); 2851 return cdb.finish(); 2852 } 2853 2854 void genmovreg(ref CodeBuilder cdb,uint to,uint from) 2855 { 2856 genmovreg(cdb, to, from, TYMAX); 2857 } 2858 2859 void genmovreg(ref CodeBuilder cdb, uint to, uint from, tym_t tym) 2860 { 2861 // register kind. ex: GPR,XMM,SEG 2862 static uint _K(uint reg) 2863 { 2864 switch (reg) 2865 { 2866 case ES: return ES; 2867 case XMM15: 2868 case XMM0: .. case XMM7: return XMM0; 2869 case AX: .. case R15: return AX; 2870 default: return reg; 2871 } 2872 } 2873 2874 // kind combination (order kept) 2875 static uint _X(uint to, uint from) { return (_K(to) << 8) + _K(from); } 2876 2877 if (to != from) 2878 { 2879 if (tym == TYMAX) tym = TYsize_t; // avoid register slicing 2880 switch (_X(to, from)) 2881 { 2882 case _X(AX, AX): 2883 genregs(cdb, 0x89, from, to); // MOV to,from 2884 if (I64 && tysize(tym) >= 8) 2885 code_orrex(cdb.last(), REX_W); 2886 break; 2887 2888 case _X(XMM0, XMM0): // MOVD/Q to,from 2889 genregs(cdb, xmmload(tym), to-XMM0, from-XMM0); 2890 checkSetVex(cdb.last(), tym); 2891 break; 2892 2893 case _X(AX, XMM0): // MOVD/Q to,from 2894 genregs(cdb, STOD, from-XMM0, to); 2895 if (I64 && tysize(tym) >= 8) 2896 code_orrex(cdb.last(), REX_W); 2897 checkSetVex(cdb.last(), tym); 2898 break; 2899 2900 case _X(XMM0, AX): // MOVD/Q to,from 2901 genregs(cdb, LODD, to-XMM0, from); 2902 if (I64 && tysize(tym) >= 8) 2903 code_orrex(cdb.last(), REX_W); 2904 checkSetVex(cdb.last(), tym); 2905 break; 2906 2907 case _X(ES, AX): 2908 assert(tysize(tym) <= REGSIZE); 2909 genregs(cdb, 0x8E, 0, from); 2910 break; 2911 2912 case _X(AX, ES): 2913 assert(tysize(tym) <= REGSIZE); 2914 genregs(cdb, 0x8C, 0, to); 2915 break; 2916 2917 default: 2918 debug printf("genmovreg(to = %s, from = %s)\n" 2919 , regm_str(mask(to)), regm_str(mask(from))); 2920 assert(0); 2921 } 2922 } 2923 } 2924 2925 /*************************************** 2926 * Generate immediate multiply instruction for r1=r2*imm. 2927 * Optimize it into LEA's if we can. 2928 */ 2929 2930 void genmulimm(ref CodeBuilder cdb,uint r1,uint r2,targ_int imm) 2931 { 2932 // These optimizations should probably be put into pinholeopt() 2933 switch (imm) 2934 { 2935 case 1: 2936 genmovreg(cdb,r1,r2); 2937 break; 2938 2939 case 5: 2940 { 2941 code cs; 2942 cs.Iop = LEA; 2943 cs.Iflags = 0; 2944 cs.Irex = 0; 2945 buildEA(&cs,r2,r2,4,0); 2946 cs.orReg(r1); 2947 cdb.gen(&cs); 2948 break; 2949 } 2950 2951 default: 2952 cdb.genc2(0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm 2953 break; 2954 } 2955 } 2956 2957 /****************************** 2958 * Load CX with the value of _AHSHIFT. 2959 */ 2960 2961 void genshift(ref CodeBuilder cdb) 2962 { 2963 version (SCPP) 2964 { 2965 // Set up ahshift to trick ourselves into giving the right fixup, 2966 // which must be seg-relative, external frame, external target. 2967 cdb.gencs(0xC7,modregrm(3,0,CX),FLfunc,getRtlsym(RTLSYM_AHSHIFT)); 2968 cdb.last().Iflags |= CFoff; 2969 } 2970 else 2971 assert(0); 2972 } 2973 2974 /****************************** 2975 * Move constant value into reg. 2976 * Take advantage of existing values in registers. 2977 * If flags & mPSW 2978 * set flags based on result 2979 * Else if flags & 8 2980 * do not disturb flags 2981 * Else 2982 * don't care about flags 2983 * If flags & 1 then byte move 2984 * If flags & 2 then short move (for I32 and I64) 2985 * If flags & 4 then don't disturb unused portion of register 2986 * If flags & 16 then reg is a byte register AL..BH 2987 * If flags & 64 (0x40) then 64 bit move (I64 only) 2988 * Returns: 2989 * code (if any) generated 2990 */ 2991 2992 void movregconst(ref CodeBuilder cdb,reg_t reg,targ_size_t value,regm_t flags) 2993 { 2994 reg_t r; 2995 regm_t mreg; 2996 2997 //printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask(reg)), value, value, flags); 2998 2999 regm_t regm = regcon.immed.mval & mask(reg); 3000 targ_size_t regv = regcon.immed.value[reg]; 3001 3002 if (flags & 1) // 8 bits 3003 { 3004 value &= 0xFF; 3005 regm &= BYTEREGS; 3006 3007 // If we already have the right value in the right register 3008 if (regm && (regv & 0xFF) == value) 3009 goto L2; 3010 3011 if (flags & 16 && reg & 4 && // if an H byte register 3012 regcon.immed.mval & mask(reg & 3) && 3013 (((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value) 3014 goto L2; 3015 3016 /* Avoid byte register loads to avoid dependency stalls. 3017 */ 3018 if ((I32 || I64) && 3019 config.target_cpu >= TARGET_PentiumPro && !(flags & 4)) 3020 goto L3; 3021 3022 // See if another register has the right value 3023 r = 0; 3024 for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1) 3025 { 3026 if (mreg & 1) 3027 { 3028 if ((regcon.immed.value[r] & 0xFF) == value) 3029 { 3030 genregs(cdb,0x8A,reg,r); // MOV regL,rL 3031 if (I64 && reg >= 4 || r >= 4) 3032 code_orrex(cdb.last(), REX); 3033 goto L2; 3034 } 3035 if (!(I64 && reg >= 4) && 3036 r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value) 3037 { 3038 genregs(cdb,0x8A,reg,r | 4); // MOV regL,rH 3039 goto L2; 3040 } 3041 } 3042 r++; 3043 } 3044 3045 if (value == 0 && !(flags & 8)) 3046 { 3047 if (!(flags & 4) && // if we can set the whole register 3048 !(flags & 16 && reg & 4)) // and reg is not an H register 3049 { 3050 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3051 regimmed_set(reg,value); 3052 regv = 0; 3053 } 3054 else 3055 genregs(cdb,0x30,reg,reg); // XOR regL,regL 3056 flags &= ~mPSW; // flags already set by XOR 3057 } 3058 else 3059 { 3060 cdb.genc2(0xC6,modregrmx(3,0,reg),value); // MOV regL,value 3061 if (reg >= 4 && I64) 3062 { 3063 code_orrex(cdb.last(), REX); 3064 } 3065 } 3066 L2: 3067 if (flags & mPSW) 3068 genregs(cdb,0x84,reg,reg); // TEST regL,regL 3069 3070 if (regm) 3071 // Set just the 'L' part of the register value 3072 regimmed_set(reg,(regv & ~cast(targ_size_t)0xFF) | value); 3073 else if (flags & 16 && reg & 4 && regcon.immed.mval & mask(reg & 3)) 3074 // Set just the 'H' part of the register value 3075 regimmed_set((reg & 3),(regv & ~cast(targ_size_t)0xFF00) | (value << 8)); 3076 return; 3077 } 3078 L3: 3079 if (I16) 3080 value = cast(targ_short) value; // sign-extend MSW 3081 else if (I32) 3082 value = cast(targ_int) value; 3083 3084 if (!I16 && flags & 2) // load 16 bit value 3085 { 3086 value &= 0xFFFF; 3087 if (value && !(flags & mPSW)) 3088 { 3089 cdb.genc2(0xC7,modregrmx(3,0,reg),value); // MOV reg,value 3090 regimmed_set(reg, value); 3091 return; 3092 } 3093 } 3094 3095 // If we already have the right value in the right register 3096 if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64)) 3097 { 3098 if (flags & mPSW) 3099 gentstreg(cdb,reg); 3100 } 3101 else if (flags & 64 && regm && regv == value) 3102 { // Look at the full 64 bits 3103 if (flags & mPSW) 3104 { 3105 gentstreg(cdb,reg); 3106 code_orrex(cdb.last(), REX_W); 3107 } 3108 } 3109 else 3110 { 3111 if (flags & mPSW) 3112 { 3113 switch (value) 3114 { 3115 case 0: 3116 genregs(cdb,0x31,reg,reg); 3117 break; 3118 3119 case 1: 3120 if (I64) 3121 goto L4; 3122 genregs(cdb,0x31,reg,reg); 3123 goto inc; 3124 3125 case ~cast(targ_size_t)0: 3126 if (I64) 3127 goto L4; 3128 genregs(cdb,0x31,reg,reg); 3129 goto dec; 3130 3131 default: 3132 L4: 3133 if (flags & 64) 3134 { 3135 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 3136 gentstreg(cdb,reg); 3137 code_orrex(cdb.last(), REX_W); 3138 } 3139 else 3140 { 3141 value &= 0xFFFFFFFF; 3142 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 3143 gentstreg(cdb,reg); 3144 } 3145 break; 3146 } 3147 } 3148 else 3149 { 3150 // Look for single byte conversion 3151 if (regcon.immed.mval & mAX) 3152 { 3153 if (I32) 3154 { 3155 if (reg == AX && value == cast(targ_short) regv) 3156 { 3157 cdb.gen1(0x98); // CWDE 3158 goto done; 3159 } 3160 if (reg == DX && 3161 value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) && 3162 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 3163 ) 3164 { 3165 cdb.gen1(0x99); // CDQ 3166 goto done; 3167 } 3168 } 3169 else if (I16) 3170 { 3171 if (reg == AX && 3172 cast(targ_short) value == cast(byte) regv) 3173 { 3174 cdb.gen1(0x98); // CBW 3175 goto done; 3176 } 3177 3178 if (reg == DX && 3179 cast(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? cast(targ_short) 0xFFFF : cast(targ_short) 0) && 3180 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 3181 ) 3182 { 3183 cdb.gen1(0x99); // CWD 3184 goto done; 3185 } 3186 } 3187 } 3188 if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486) 3189 { 3190 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3191 goto done; 3192 } 3193 3194 if (!I64 && regm && !(flags & 8)) 3195 { 3196 if (regv + 1 == value || 3197 // Catch case of (0xFFFF+1 == 0) for 16 bit compiles 3198 (I16 && cast(targ_short)(regv + 1) == cast(targ_short)value)) 3199 { 3200 inc: 3201 cdb.gen1(0x40 + reg); // INC reg 3202 goto done; 3203 } 3204 if (regv - 1 == value) 3205 { 3206 dec: 3207 cdb.gen1(0x48 + reg); // DEC reg 3208 goto done; 3209 } 3210 } 3211 3212 // See if another register has the right value 3213 r = 0; 3214 for (mreg = regcon.immed.mval; mreg; mreg >>= 1) 3215 { 3216 debug 3217 assert(!I16 || regcon.immed.value[r] == cast(targ_short)regcon.immed.value[r]); 3218 3219 if (mreg & 1 && regcon.immed.value[r] == value) 3220 { 3221 genmovreg(cdb,reg,r); 3222 goto done; 3223 } 3224 r++; 3225 } 3226 3227 if (value == 0 && !(flags & 8)) 3228 { 3229 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3230 } 3231 else 3232 { // See if we can just load a byte 3233 if (regm & BYTEREGS && 3234 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro) 3235 ) 3236 { 3237 if ((regv & ~cast(targ_size_t)0xFF) == (value & ~cast(targ_size_t)0xFF)) 3238 { 3239 movregconst(cdb,reg,value,(flags & 8) |4|1); // load regL 3240 return; 3241 } 3242 if (regm & (mAX|mBX|mCX|mDX) && 3243 (regv & ~cast(targ_size_t)0xFF00) == (value & ~cast(targ_size_t)0xFF00) && 3244 !I64) 3245 { 3246 movregconst(cdb,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH 3247 return; 3248 } 3249 } 3250 if (flags & 64) 3251 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 3252 else 3253 { 3254 value &= 0xFFFFFFFF; 3255 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 3256 } 3257 } 3258 } 3259 done: 3260 regimmed_set(reg,value); 3261 } 3262 } 3263 3264 /************************** 3265 * Generate a jump instruction. 3266 */ 3267 3268 void genjmp(ref CodeBuilder cdb,opcode_t op,uint fltarg,block *targ) 3269 { 3270 code cs; 3271 cs.Iop = op & 0xFF; 3272 cs.Iflags = 0; 3273 cs.Irex = 0; 3274 if (op != JMP && op != 0xE8) // if not already long branch 3275 cs.Iflags = CFjmp16; // assume long branch for op = 0x7x 3276 cs.IFL2 = cast(ubyte)fltarg; // FLblock (or FLcode) 3277 cs.IEV2.Vblock = targ; // target block (or code) 3278 if (fltarg == FLcode) 3279 (cast(code *)targ).Iflags |= CFtarg; 3280 3281 if (config.flags4 & CFG4fastfloat) // if fast floating point 3282 { 3283 cdb.gen(&cs); 3284 return; 3285 } 3286 3287 switch (op & 0xFF00) // look at second jump opcode 3288 { 3289 // The JP and JNP come from floating point comparisons 3290 case JP << 8: 3291 cdb.gen(&cs); 3292 cs.Iop = JP; 3293 cdb.gen(&cs); 3294 break; 3295 3296 case JNP << 8: 3297 { 3298 // Do a JP around the jump instruction 3299 code *cnop = gennop(null); 3300 genjmp(cdb,JP,FLcode,cast(block *) cnop); 3301 cdb.gen(&cs); 3302 cdb.append(cnop); 3303 break; 3304 } 3305 3306 case 1 << 8: // toggled no jump 3307 case 0 << 8: 3308 cdb.gen(&cs); 3309 break; 3310 3311 default: 3312 debug 3313 printf("jop = x%x\n",op); 3314 assert(0); 3315 } 3316 } 3317 3318 /********************************************* 3319 * Generate first part of prolog for interrupt function. 3320 */ 3321 void prolog_ifunc(ref CodeBuilder cdb, tym_t* tyf) 3322 { 3323 static immutable ubyte[4] ops2 = [ 0x60,0x1E,0x06,0 ]; 3324 static immutable ubyte[11] ops0 = [ 0x50,0x51,0x52,0x53, 3325 0x54,0x55,0x56,0x57, 3326 0x1E,0x06,0 ]; 3327 3328 immutable(ubyte)* p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 3329 do 3330 cdb.gen1(*p); 3331 while (*++p); 3332 3333 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3334 if (localsize) 3335 cod3_stackadj(cdb, cast(int)localsize); 3336 3337 *tyf |= mTYloadds; 3338 } 3339 3340 void prolog_ifunc2(ref CodeBuilder cdb, tym_t tyf, tym_t tym, bool pushds) 3341 { 3342 /* Determine if we need to reload DS */ 3343 if (tyf & mTYloadds) 3344 { 3345 if (!pushds) // if not already pushed 3346 cdb.gen1(0x1E); // PUSH DS 3347 spoff += _tysize[TYint]; 3348 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); // MOV AX,DGROUP 3349 code *c = cdb.last(); 3350 c.IEV2.Vseg = DATA; 3351 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3352 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3353 useregs(mAX); 3354 } 3355 3356 if (tym == TYifunc) 3357 cdb.gen1(0xFC); // CLD 3358 } 3359 3360 void prolog_16bit_windows_farfunc(ref CodeBuilder cdb, tym_t* tyf, bool* pushds) 3361 { 3362 int wflags = config.wflags; 3363 if (wflags & WFreduced && !(*tyf & mTYexport)) 3364 { // reduced prolog/epilog for non-exported functions 3365 wflags &= ~(WFdgroup | WFds | WFss); 3366 } 3367 3368 getregsNoSave(mAX); // should not have any value in AX 3369 3370 int segreg; 3371 switch (wflags & (WFdgroup | WFds | WFss)) 3372 { 3373 case WFdgroup: // MOV AX,DGROUP 3374 { 3375 if (wflags & WFreduced) 3376 *tyf &= ~mTYloadds; // remove redundancy 3377 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); 3378 code *c = cdb.last(); 3379 c.IEV2.Vseg = DATA; 3380 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3381 break; 3382 } 3383 3384 case WFss: 3385 segreg = 2; // SS 3386 goto Lmovax; 3387 3388 case WFds: 3389 segreg = 3; // DS 3390 Lmovax: 3391 cdb.gen2(0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg 3392 if (wflags & WFds) 3393 cdb.gen1(0x90); // NOP 3394 break; 3395 3396 case 0: 3397 break; 3398 3399 default: 3400 debug 3401 printf("config.wflags = x%x\n",config.wflags); 3402 assert(0); 3403 } 3404 if (wflags & WFincbp) 3405 cdb.gen1(0x40 + BP); // INC BP 3406 cdb.gen1(0x50 + BP); // PUSH BP 3407 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3408 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 3409 { 3410 cdb.gen1(0x1E); // PUSH DS 3411 *pushds = true; 3412 BPoff = -REGSIZE; 3413 } 3414 if (wflags & (WFds | WFss | WFdgroup)) 3415 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3416 } 3417 3418 /********************************************** 3419 * Set up frame register. 3420 * Params: 3421 * cdb = write generated code here 3422 * farfunc = true if a far function 3423 * enter = set to true if ENTER instruction can be used, false otherwise 3424 * xlocalsize = amount of local variables, set to amount to be subtracted from stack pointer 3425 * cfa_offset = set to frame pointer's offset from the CFA 3426 * Returns: 3427 * generated code 3428 */ 3429 void prolog_frame(ref CodeBuilder cdb, bool farfunc, ref uint xlocalsize, out bool enter, out int cfa_offset) 3430 { 3431 //printf("prolog_frame\n"); 3432 cfa_offset = 0; 3433 3434 if (0 && config.exe == EX_WIN64) 3435 { 3436 // PUSH RBP 3437 // LEA RBP,0[RSP] 3438 cdb. gen1(0x50 + BP); 3439 cdb.genc1(LEA,(REX_W<<16) | (modregrm(0,4,SP)<<8) | modregrm(2,BP,4),FLconst,0); 3440 enter = false; 3441 return; 3442 } 3443 3444 if (config.wflags & WFincbp && farfunc) 3445 cdb.gen1(0x40 + BP); // INC BP 3446 if (config.target_cpu < TARGET_80286 || 3447 config.exe & (EX_posix | EX_WIN64) || 3448 !localsize || 3449 config.flags & CFGstack || 3450 (xlocalsize >= 0x1000 && config.exe & EX_flat) || 3451 localsize >= 0x10000 || 3452 (NTEXCEPTIONS == 2 && 3453 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))) || 3454 (config.target_cpu >= TARGET_80386 && 3455 config.flags4 & CFG4speed) 3456 ) 3457 { 3458 cdb.gen1(0x50 + BP); // PUSH BP 3459 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3460 if (I64) 3461 code_orrex(cdb.last(), REX_W); // MOV RBP,RSP 3462 if ((config.objfmt & (OBJ_ELF | OBJ_MACH)) && config.fulltypes) 3463 // Don't reorder instructions, as dwarf CFA relies on it 3464 code_orflag(cdb.last(), CFvolatile); 3465 static if (NTEXCEPTIONS == 2) 3466 { 3467 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH)) 3468 { 3469 nteh_prolog(cdb); 3470 int sz = nteh_contextsym_size(); 3471 assert(sz != 0); // should be 5*4, not 0 3472 xlocalsize -= sz; // sz is already subtracted from ESP 3473 // by nteh_prolog() 3474 } 3475 } 3476 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3477 config.ehmethod == EHmethod.EH_DWARF) 3478 { 3479 int off = 2 * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3480 dwarf_CFA_set_loc(1); // address after PUSH EBP 3481 dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP] 3482 dwarf_CFA_offset(BP, -off); // EBP is at 0[ESP] 3483 dwarf_CFA_set_loc(I64 ? 4 : 3); // address after MOV EBP,ESP 3484 /* Oddly, the CFA is not the same as the frame pointer, 3485 * which is why the offset of BP is set to 8 3486 */ 3487 dwarf_CFA_set_reg_offset(BP, off); // CFA is now 0[EBP] 3488 cfa_offset = off; // remember the difference between the CFA and the frame pointer 3489 } 3490 enter = false; /* do not use ENTER instruction */ 3491 } 3492 else 3493 enter = true; 3494 } 3495 3496 /********************************************** 3497 * Enforce stack alignment. 3498 * Input: 3499 * cdb code builder. 3500 * Returns: 3501 * generated code 3502 */ 3503 void prolog_stackalign(ref CodeBuilder cdb) 3504 { 3505 if (!enforcealign) 3506 return; 3507 3508 const offset = (hasframe ? 2 : 1) * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3509 if (offset & (STACKALIGN - 1) || TARGET_STACKALIGN < STACKALIGN) 3510 cod3_stackalign(cdb, STACKALIGN); 3511 } 3512 3513 void prolog_frameadj(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool enter, bool* pushalloc) 3514 { 3515 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3516 3517 bool check; 3518 if (config.exe & (EX_LINUX | EX_LINUX64)) 3519 check = false; // seems that Linux doesn't need to fault in stack pages 3520 else 3521 check = (config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check 3522 || (config.exe & (EX_windos & EX_flat) && xlocalsize >= 0x1000); 3523 3524 if (check) 3525 { 3526 if (I16) 3527 { 3528 // BUG: Won't work if parameter is passed in AX 3529 movregconst(cdb,AX,xlocalsize,false); // MOV AX,localsize 3530 makeitextern(getRtlsym(RTLSYM_CHKSTK)); 3531 // CALL _chkstk 3532 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_CHKSTK)); 3533 useregs((ALLREGS | mBP | mES) & ~getRtlsym(RTLSYM_CHKSTK).Sregsaved); 3534 } 3535 else 3536 { 3537 /* Watch out for 64 bit code where EDX is passed as a register parameter 3538 */ 3539 reg_t reg = I64 ? R11 : DX; // scratch register 3540 3541 /* MOV EDX, xlocalsize/0x1000 3542 * L1: SUB ESP, 0x1000 3543 * TEST [ESP],ESP 3544 * DEC EDX 3545 * JNE L1 3546 * SUB ESP, xlocalsize % 0x1000 3547 */ 3548 movregconst(cdb, reg, xlocalsize / 0x1000, false); 3549 cod3_stackadj(cdb, 0x1000); 3550 code_orflag(cdb.last(), CFtarg2); 3551 cdb.gen2sib(0x85, modregrm(0,SP,4),modregrm(0,4,SP)); 3552 if (I64) 3553 { cdb.gen2(0xFF, modregrmx(3,1,R11)); // DEC R11D 3554 cdb.genc2(JNE,0,cast(targ_uns)-15); 3555 } 3556 else 3557 { cdb.gen1(0x48 + DX); // DEC EDX 3558 cdb.genc2(JNE,0,cast(targ_uns)-12); 3559 } 3560 regimmed_set(reg,0); // reg is now 0 3561 cod3_stackadj(cdb, xlocalsize & 0xFFF); 3562 useregs(mask(reg)); 3563 } 3564 } 3565 else 3566 { 3567 if (enter) 3568 { // ENTER xlocalsize,0 3569 cdb.genc(ENTER,0,FLconst,xlocalsize,FLconst,cast(targ_uns) 0); 3570 assert(!(config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)); // didn't emit Dwarf data 3571 } 3572 else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized) 3573 { 3574 cdb. gen1(0x50 + pushallocreg); // PUSH AX 3575 // Do this to prevent an -x[EBP] to be moved in 3576 // front of the push. 3577 code_orflag(cdb.last(),CFvolatile); 3578 *pushalloc = true; 3579 } 3580 else 3581 cod3_stackadj(cdb, xlocalsize); 3582 } 3583 } 3584 3585 void prolog_frameadj2(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool* pushalloc) 3586 { 3587 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3588 if (xlocalsize == REGSIZE) 3589 { 3590 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3591 *pushalloc = true; 3592 } 3593 else if (xlocalsize == 2 * REGSIZE) 3594 { 3595 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3596 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3597 *pushalloc = true; 3598 } 3599 else 3600 cod3_stackadj(cdb, xlocalsize); 3601 } 3602 3603 void prolog_setupalloca(ref CodeBuilder cdb) 3604 { 3605 //printf("prolog_setupalloca() offset x%x size x%x alignment x%x\n", 3606 //cast(int)Alloca.offset, cast(int)Alloca.size, cast(int)Alloca.alignment); 3607 // Set up magic parameter for alloca() 3608 // MOV -REGSIZE[BP],localsize - BPoff 3609 cdb.genc(0xC7,modregrm(2,0,BPRM), 3610 FLconst,Alloca.offset + BPoff, 3611 FLconst,localsize - BPoff); 3612 if (I64) 3613 code_orrex(cdb.last(), REX_W); 3614 } 3615 3616 /************************************** 3617 * Save registers that the function destroys, 3618 * but that the ABI says should be preserved across 3619 * function calls. 3620 * 3621 * Emit Dwarf info for these saves. 3622 * Params: 3623 * cdb = append generated instructions to this 3624 * topush = mask of registers to push 3625 * cfa_offset = offset of frame pointer from CFA 3626 */ 3627 3628 void prolog_saveregs(ref CodeBuilder cdb, regm_t topush, int cfa_offset) 3629 { 3630 if (pushoffuse) 3631 { 3632 // Save to preallocated section in the stack frame 3633 int xmmtopush = numbitsset(topush & XMMREGS); // XMM regs take 16 bytes 3634 int gptopush = numbitsset(topush) - xmmtopush; // general purpose registers to save 3635 targ_size_t xmmoffset = pushoff + BPoff; 3636 if (!hasframe || enforcealign) 3637 xmmoffset += EBPtoESP; 3638 targ_size_t gpoffset = xmmoffset + xmmtopush * 16; 3639 while (topush) 3640 { 3641 reg_t reg = findreg(topush); 3642 topush &= ~mask(reg); 3643 if (isXMMreg(reg)) 3644 { 3645 if (hasframe && !enforcealign) 3646 { 3647 // MOVUPD xmmoffset[EBP],xmm 3648 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3649 } 3650 else 3651 { 3652 // MOVUPD xmmoffset[ESP],xmm 3653 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3654 } 3655 xmmoffset += 16; 3656 } 3657 else 3658 { 3659 if (hasframe && !enforcealign) 3660 { 3661 // MOV gpoffset[EBP],reg 3662 cdb.genc1(0x89,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3663 } 3664 else 3665 { 3666 // MOV gpoffset[ESP],reg 3667 cdb.genc1(0x89,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3668 } 3669 if (I64) 3670 code_orrex(cdb.last(), REX_W); 3671 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3672 config.ehmethod == EHmethod.EH_DWARF) 3673 { // Emit debug_frame data giving location of saved register 3674 code *c = cdb.finish(); 3675 pinholeopt(c, null); 3676 dwarf_CFA_set_loc(calcblksize(c)); // address after save 3677 dwarf_CFA_offset(reg, cast(int)(gpoffset - cfa_offset)); 3678 cdb.reset(); 3679 cdb.append(c); 3680 } 3681 gpoffset += REGSIZE; 3682 } 3683 } 3684 } 3685 else 3686 { 3687 while (topush) /* while registers to push */ 3688 { 3689 reg_t reg = findreg(topush); 3690 topush &= ~mask(reg); 3691 if (isXMMreg(reg)) 3692 { 3693 // SUB RSP,16 3694 cod3_stackadj(cdb, 16); 3695 // MOVUPD 0[RSP],xmm 3696 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3697 EBPtoESP += 16; 3698 spoff += 16; 3699 } 3700 else 3701 { 3702 genpush(cdb, reg); 3703 EBPtoESP += REGSIZE; 3704 spoff += REGSIZE; 3705 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3706 config.ehmethod == EHmethod.EH_DWARF) 3707 { // Emit debug_frame data giving location of saved register 3708 // relative to 0[EBP] 3709 code *c = cdb.finish(); 3710 pinholeopt(c, null); 3711 dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg 3712 dwarf_CFA_offset(reg, -EBPtoESP - cfa_offset); 3713 cdb.reset(); 3714 cdb.append(c); 3715 } 3716 } 3717 } 3718 } 3719 } 3720 3721 /************************************** 3722 * Undo prolog_saveregs() 3723 */ 3724 3725 private void epilog_restoreregs(ref CodeBuilder cdb, regm_t topop) 3726 { 3727 debug 3728 if (topop & ~(XMMREGS | 0xFFFF)) 3729 printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg)); 3730 3731 assert(!(topop & ~(XMMREGS | 0xFFFF))); 3732 if (pushoffuse) 3733 { 3734 // Save to preallocated section in the stack frame 3735 int xmmtopop = numbitsset(topop & XMMREGS); // XMM regs take 16 bytes 3736 int gptopop = numbitsset(topop) - xmmtopop; // general purpose registers to save 3737 targ_size_t xmmoffset = pushoff + BPoff; 3738 if (!hasframe || enforcealign) 3739 xmmoffset += EBPtoESP; 3740 targ_size_t gpoffset = xmmoffset + xmmtopop * 16; 3741 while (topop) 3742 { 3743 reg_t reg = findreg(topop); 3744 topop &= ~mask(reg); 3745 if (isXMMreg(reg)) 3746 { 3747 if (hasframe && !enforcealign) 3748 { 3749 // MOVUPD xmm,xmmoffset[EBP] 3750 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3751 } 3752 else 3753 { 3754 // MOVUPD xmm,xmmoffset[ESP] 3755 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3756 } 3757 xmmoffset += 16; 3758 } 3759 else 3760 { 3761 if (hasframe && !enforcealign) 3762 { 3763 // MOV reg,gpoffset[EBP] 3764 cdb.genc1(0x8B,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3765 } 3766 else 3767 { 3768 // MOV reg,gpoffset[ESP] 3769 cdb.genc1(0x8B,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3770 } 3771 if (I64) 3772 code_orrex(cdb.last(), REX_W); 3773 gpoffset += REGSIZE; 3774 } 3775 } 3776 } 3777 else 3778 { 3779 reg_t reg = I64 ? XMM7 : DI; 3780 if (!(topop & XMMREGS)) 3781 reg = R15; 3782 regm_t regm = 1 << reg; 3783 3784 while (topop) 3785 { if (topop & regm) 3786 { 3787 if (isXMMreg(reg)) 3788 { 3789 // MOVUPD xmm,0[RSP] 3790 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3791 // ADD RSP,16 3792 cod3_stackadj(cdb, -16); 3793 } 3794 else 3795 { 3796 cdb.gen1(0x58 + (reg & 7)); // POP reg 3797 if (reg & 8) 3798 code_orrex(cdb.last(), REX_B); 3799 } 3800 topop &= ~regm; 3801 } 3802 regm >>= 1; 3803 reg--; 3804 } 3805 } 3806 } 3807 3808 version (SCPP) 3809 { 3810 void prolog_trace(ref CodeBuilder cdb, bool farfunc, uint* regsaved) 3811 { 3812 Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_PRO_F : RTLSYM_TRACE_PRO_N); 3813 makeitextern(s); 3814 cdb.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALL _trace 3815 if (!I16) 3816 code_orflag(cdb.last(),CFoff | CFselfrel); 3817 /* Embedding the function name inline after the call works, but it 3818 * makes disassembling the code annoying. 3819 */ 3820 static if (ELFOBJ || MACHOBJ) 3821 { 3822 // Generate length prefixed name that is recognized by profiler 3823 size_t len = strlen(funcsym_p.Sident); 3824 char *buffer = cast(char *)malloc(len + 4); 3825 assert(buffer); 3826 if (len <= 254) 3827 { 3828 buffer[0] = len; 3829 memcpy(buffer + 1, funcsym_p.Sident, len); 3830 len++; 3831 } 3832 else 3833 { 3834 buffer[0] = 0xFF; 3835 buffer[1] = 0; 3836 buffer[2] = len & 0xFF; 3837 buffer[3] = len >> 8; 3838 memcpy(buffer + 4, funcsym_p.Sident, len); 3839 len += 4; 3840 } 3841 cdb.genasm(buffer, len); // append func name 3842 free(buffer); 3843 } 3844 else 3845 { 3846 char [IDMAX+IDOHD+1] name = void; 3847 size_t len = objmod.mangle(funcsym_p,name.ptr); 3848 assert(len < name.length); 3849 cdb.genasm(name.ptr,len); // append func name 3850 } 3851 *regsaved = s.Sregsaved; 3852 } 3853 } 3854 3855 /****************************** 3856 * Generate special varargs prolog for Posix 64 bit systems. 3857 * Params: 3858 * cdb = sink for generated code 3859 * sv = symbol for __va_argsave 3860 * namedargs = registers that named parameters (not ... arguments) were passed in. 3861 */ 3862 void prolog_genvarargs(ref CodeBuilder cdb, Symbol* sv, regm_t namedargs) 3863 { 3864 /* Generate code to move any arguments passed in registers into 3865 * the stack variable __va_argsave, 3866 * so we can reference it via pointers through va_arg(). 3867 * struct __va_argsave_t { 3868 * size_t[6] regs; 3869 * real[8] fpregs; 3870 * uint offset_regs; 3871 * uint offset_fpregs; 3872 * void* stack_args; 3873 * void* reg_args; 3874 * } 3875 * The MOVAPS instructions seg fault if data is not aligned on 3876 * 16 bytes, so this gives us a nice check to ensure no mistakes. 3877 MOV voff+0*8[RBP],EDI 3878 MOV voff+1*8[RBP],ESI 3879 MOV voff+2*8[RBP],RDX 3880 MOV voff+3*8[RBP],RCX 3881 MOV voff+4*8[RBP],R8 3882 MOV voff+5*8[RBP],R9 3883 MOVZX EAX,AL // AL = 0..8, # of XMM registers used 3884 SHL EAX,2 // 4 bytes for each MOVAPS 3885 LEA R11,offset L2[RIP] 3886 SUB R11,RAX 3887 LEA RAX,voff+6*8+0x7F[RBP] 3888 JMP R11d 3889 MOVAPS -0x0F[RAX],XMM7 // only save XMM registers if actually used 3890 MOVAPS -0x1F[RAX],XMM6 3891 MOVAPS -0x2F[RAX],XMM5 3892 MOVAPS -0x3F[RAX],XMM4 3893 MOVAPS -0x4F[RAX],XMM3 3894 MOVAPS -0x5F[RAX],XMM2 3895 MOVAPS -0x6F[RAX],XMM1 3896 MOVAPS -0x7F[RAX],XMM0 3897 L2: 3898 MOV 1[RAX],offset_regs // set __va_argsave.offset_regs 3899 MOV 5[RAX],offset_fpregs // set __va_argsave.offset_fpregs 3900 LEA R11, Para.size+Para.offset[RBP] 3901 MOV 9[RAX],R11 // set __va_argsave.stack_args 3902 SUB RAX,6*8+0x7F // point to start of __va_argsave 3903 MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args 3904 * RAX and R11 are destroyed. 3905 */ 3906 3907 /* Save registers into the voff area on the stack 3908 */ 3909 targ_size_t voff = Auto.size + BPoff + sv.Soffset; // EBP offset of start of sv 3910 const int vregnum = 6; 3911 const uint vsize = vregnum * 8 + 8 * 16; 3912 3913 static immutable ubyte[vregnum] regs = [ DI,SI,DX,CX,R8,R9 ]; 3914 3915 if (!hasframe || enforcealign) 3916 voff += EBPtoESP; 3917 3918 for (int i = 0; i < vregnum; i++) 3919 { 3920 uint r = regs[i]; 3921 if (!(mask(r) & namedargs)) // unnamed arguments would be the ... ones 3922 { 3923 uint ea = (REX_W << 16) | modregxrm(2,r,BPRM); 3924 if (!hasframe || enforcealign) 3925 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4); 3926 cdb.genc1(0x89,ea,FLconst,voff + i*8); 3927 } 3928 } 3929 3930 genregs(cdb,MOVZXb,AX,AX); // MOVZX EAX,AL 3931 cdb.genc2(0xC1,modregrm(3,4,AX),2); // SHL EAX,2 3932 int raxoff = cast(int)(voff+6*8+0x7F); 3933 uint L2offset = (raxoff < -0x7F) ? 0x2D : 0x2A; 3934 if (!hasframe || enforcealign) 3935 L2offset += 1; // +1 for sib byte 3936 // LEA R11,offset L2[RIP] 3937 cdb.genc1(LEA,(REX_W << 16) | modregxrm(0,R11,5),FLconst,L2offset); 3938 genregs(cdb,0x29,AX,R11); // SUB R11,RAX 3939 code_orrex(cdb.last(), REX_W); 3940 // LEA RAX,voff+vsize-6*8-16+0x7F[RBP] 3941 uint ea = (REX_W << 16) | modregrm(2,AX,BPRM); 3942 if (!hasframe || enforcealign) 3943 // add sib byte for [RSP] addressing 3944 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4); 3945 cdb.genc1(LEA,ea,FLconst,raxoff); 3946 cdb.gen2(0xFF,modregrmx(3,4,R11)); // JMP R11d 3947 for (int i = 0; i < 8; i++) 3948 { 3949 // MOVAPS -15-16*i[RAX],XMM7-i 3950 cdb.genc1(0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i); 3951 } 3952 3953 /* Compute offset_regs and offset_fpregs 3954 */ 3955 uint offset_regs = 0; 3956 uint offset_fpregs = vregnum * 8; 3957 for (int i = AX; i <= XMM7; i++) 3958 { 3959 regm_t m = mask(i); 3960 if (m & namedargs) 3961 { 3962 if (m & (mDI|mSI|mDX|mCX|mR8|mR9)) 3963 offset_regs += 8; 3964 else if (m & XMMREGS) 3965 offset_fpregs += 16; 3966 namedargs &= ~m; 3967 if (!namedargs) 3968 break; 3969 } 3970 } 3971 // MOV 1[RAX],offset_regs 3972 cdb.genc(0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs); 3973 3974 // MOV 5[RAX],offset_fpregs 3975 cdb.genc(0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs); 3976 3977 // LEA R11, Para.size+Para.offset[RBP] 3978 ea = modregxrm(2,R11,BPRM); 3979 if (!hasframe) 3980 ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4); 3981 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 3982 cdb.genc1(LEA,(REX_W << 16) | ea,FLconst,Para.size + Para.offset); 3983 3984 // MOV 9[RAX],R11 3985 cdb.genc1(0x89,(REX_W << 16) | modregxrm(2,R11,AX),FLconst,9); 3986 3987 // SUB RAX,6*8+0x7F // point to start of __va_argsave 3988 cdb.genc2(0x2D,0,6*8+0x7F); 3989 code_orrex(cdb.last(), REX_W); 3990 3991 // MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args 3992 cdb.genc1(0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8); 3993 3994 pinholeopt(cdb.peek(), null); 3995 useregs(mAX|mR11); 3996 } 3997 3998 void prolog_gen_win64_varargs(ref CodeBuilder cdb) 3999 { 4000 /* The Microsoft scheme. 4001 * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80) 4002 * Copy registers onto stack. 4003 mov 8[RSP],RCX 4004 mov 010h[RSP],RDX 4005 mov 018h[RSP],R8 4006 mov 020h[RSP],R9 4007 */ 4008 } 4009 4010 /************************************ 4011 * Params: 4012 * cdb = generated code sink 4013 * tf = what's the type of the function 4014 * pushalloc = use PUSH to allocate on the stack rather than subtracting from SP 4015 * namedargs = set to the registers that named parameters were passed in 4016 */ 4017 void prolog_loadparams(ref CodeBuilder cdb, tym_t tyf, bool pushalloc, out regm_t namedargs) 4018 { 4019 //printf("prolog_loadparams()\n"); 4020 debug 4021 for (SYMIDX si = 0; si < globsym.length; si++) 4022 { 4023 Symbol *s = globsym[si]; 4024 if (debugr && (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)) 4025 { 4026 printf("symbol '%s' is fastpar in register [l %s, m %s]\n", s.Sident.ptr, 4027 regm_str(mask(s.Spreg)), 4028 (s.Spreg2 == NOREG ? "NOREG" : regm_str(mask(s.Spreg2)))); 4029 if (s.Sfl == FLreg) 4030 printf("\tassigned to register %s\n", regm_str(mask(s.Sreglsw))); 4031 } 4032 } 4033 4034 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 4035 4036 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were not assigned 4037 * registers into their stack locations. 4038 */ 4039 regm_t shadowregm = 0; 4040 for (SYMIDX si = 0; si < globsym.length; si++) 4041 { 4042 Symbol *s = globsym[si]; 4043 uint sz = cast(uint)type_size(s.Stype); 4044 4045 if (!((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl != FLreg)) 4046 continue; 4047 // Argument is passed in a register 4048 4049 type *t = s.Stype; 4050 type *t2 = null; 4051 4052 tym_t tyb = tybasic(t.Tty); 4053 4054 // This logic is same as FuncParamRegs_alloc function at src/dmd/backend/cod1.d 4055 // 4056 // Find suitable SROA based on the element type 4057 // (Don't put volatile parameters in registers) 4058 if (tyb == TYarray && !(t.Tty & mTYvolatile)) 4059 { 4060 type *targ1; 4061 argtypes(t, targ1, t2); 4062 if (targ1) 4063 t = targ1; 4064 } 4065 4066 // If struct just wraps another type 4067 if (tyb == TYstruct) 4068 { 4069 // On windows 64 bits, structs occupy a general purpose register, 4070 // regardless of the struct size or the number & types of its fields. 4071 if (config.exe != EX_WIN64) 4072 { 4073 type *targ1 = t.Ttag.Sstruct.Sarg1type; 4074 t2 = t.Ttag.Sstruct.Sarg2type; 4075 if (targ1) 4076 t = targ1; 4077 } 4078 } 4079 4080 if (Symbol_Sisdead(s, anyiasm)) 4081 { 4082 // Ignore it, as it is never referenced 4083 continue; 4084 } 4085 4086 targ_size_t offset = Fast.size + BPoff; 4087 if (s.Sclass == SCshadowreg) 4088 offset = Para.size; 4089 offset += s.Soffset; 4090 if (!hasframe || (enforcealign && s.Sclass != SCshadowreg)) 4091 offset += EBPtoESP; 4092 4093 reg_t preg = s.Spreg; 4094 foreach (i; 0 .. 2) // twice, once for each possible parameter register 4095 { 4096 shadowregm |= mask(preg); 4097 opcode_t op = 0x89; // MOV x[EBP],preg 4098 if (isXMMreg(preg)) 4099 op = xmmstore((t.Tty & TYarray) && t.Tnext ? t.Tnext.Tty : t.Tty); 4100 if (!(pushalloc && preg == pushallocreg) || s.Sclass == SCshadowreg) 4101 { 4102 if (hasframe && (!enforcealign || s.Sclass == SCshadowreg)) 4103 { 4104 // MOV x[EBP],preg 4105 cdb.genc1(op,modregxrm(2,preg,BPRM),FLconst,offset); 4106 if (isXMMreg(preg)) 4107 { 4108 checkSetVex(cdb.last(), t.Tty); 4109 } 4110 else 4111 { 4112 //printf("%s Fast.size = %d, BPoff = %d, Soffset = %d, sz = %d\n", 4113 // s.Sident, (int)Fast.size, (int)BPoff, (int)s.Soffset, (int)sz); 4114 if (I64 && sz > 4) 4115 code_orrex(cdb.last(), REX_W); 4116 } 4117 } 4118 else 4119 { 4120 // MOV offset[ESP],preg 4121 // BUG: byte size? 4122 cdb.genc1(op, 4123 (modregrm(0,4,SP) << 8) | 4124 modregxrm(2,preg,4),FLconst,offset); 4125 if (isXMMreg(preg)) 4126 { 4127 checkSetVex(cdb.last(), t.Tty); 4128 } 4129 else 4130 { 4131 if (I64 && sz > 4) 4132 cdb.last().Irex |= REX_W; 4133 } 4134 } 4135 } 4136 preg = s.Spreg2; 4137 if (preg == NOREG) 4138 break; 4139 if (t2) 4140 t = t2; 4141 offset += REGSIZE; 4142 } 4143 } 4144 4145 if (config.exe == EX_WIN64 && variadic(funcsym_p.Stype)) 4146 { 4147 /* The Microsoft scheme. 4148 * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80) 4149 * Copy registers onto stack. 4150 mov 8[RSP],RCX or XMM0 4151 mov 010h[RSP],RDX or XMM1 4152 mov 018h[RSP],R8 or XMM2 4153 mov 020h[RSP],R9 or XMM3 4154 */ 4155 static immutable reg_t[4] vregs = [ CX,DX,R8,R9 ]; 4156 for (int i = 0; i < vregs.length; ++i) 4157 { 4158 uint preg = vregs[i]; 4159 uint offset = cast(uint)(Para.size + i * REGSIZE); 4160 if (!(shadowregm & (mask(preg) | mask(XMM0 + i)))) 4161 { 4162 if (hasframe) 4163 { 4164 // MOV x[EBP],preg 4165 cdb.genc1(0x89, 4166 modregxrm(2,preg,BPRM),FLconst, offset); 4167 code_orrex(cdb.last(), REX_W); 4168 } 4169 else 4170 { 4171 // MOV offset[ESP],preg 4172 cdb.genc1(0x89, 4173 (modregrm(0,4,SP) << 8) | 4174 modregxrm(2,preg,4),FLconst,offset + EBPtoESP); 4175 } 4176 cdb.last().Irex |= REX_W; 4177 } 4178 } 4179 } 4180 4181 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were assigned registers 4182 * into their assigned registers. 4183 * Note that we have a big problem if Pa is passed in R1 and assigned to R2, 4184 * and Pb is passed in R2 but assigned to R1. Detect it and assert. 4185 */ 4186 regm_t assignregs = 0; 4187 for (SYMIDX si = 0; si < globsym.length; si++) 4188 { 4189 Symbol *s = globsym[si]; 4190 uint sz = cast(uint)type_size(s.Stype); 4191 4192 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 4193 namedargs |= s.Spregm(); 4194 4195 if (!((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl == FLreg)) 4196 { 4197 // Argument is passed in a register 4198 continue; 4199 } 4200 4201 type *t = s.Stype; 4202 type *t2 = null; 4203 if (tybasic(t.Tty) == TYstruct && config.exe != EX_WIN64) 4204 { type *targ1 = t.Ttag.Sstruct.Sarg1type; 4205 t2 = t.Ttag.Sstruct.Sarg2type; 4206 if (targ1) 4207 t = targ1; 4208 } 4209 4210 reg_t preg = s.Spreg; 4211 reg_t r = s.Sreglsw; 4212 for (int i = 0; i < 2; ++i) 4213 { 4214 if (preg == NOREG) 4215 break; 4216 assert(!(mask(preg) & assignregs)); // not already stepped on 4217 assignregs |= mask(r); 4218 4219 // MOV reg,preg 4220 if (r == preg) 4221 { 4222 } 4223 else if (mask(preg) & XMMREGS) 4224 { 4225 const op = xmmload(t.Tty); // MOVSS/D xreg,preg 4226 uint xreg = r - XMM0; 4227 cdb.gen2(op,modregxrmx(3,xreg,preg - XMM0)); 4228 } 4229 else 4230 { 4231 //printf("test1 mov %s, %s\n", regstring[r], regstring[preg]); 4232 genmovreg(cdb,r,preg); 4233 if (I64 && sz == 8) 4234 code_orrex(cdb.last(), REX_W); 4235 } 4236 preg = s.Spreg2; 4237 r = s.Sregmsw; 4238 if (t2) 4239 t = t2; 4240 } 4241 } 4242 4243 /* For parameters that were passed on the stack, but are enregistered, 4244 * initialize the registers with the parameter stack values. 4245 * Do not use assignaddr(), as it will replace the stack reference with 4246 * the register. 4247 */ 4248 for (SYMIDX si = 0; si < globsym.length; si++) 4249 { 4250 Symbol *s = globsym[si]; 4251 uint sz = cast(uint)type_size(s.Stype); 4252 4253 if (!((s.Sclass == SCregpar || s.Sclass == SCparameter) && 4254 s.Sfl == FLreg && 4255 (refparam 4256 // This variable has been reference by a nested function 4257 || MARS && s.Stype.Tty & mTYvolatile 4258 ))) 4259 { 4260 continue; 4261 } 4262 // MOV reg,param[BP] 4263 //assert(refparam); 4264 if (mask(s.Sreglsw) & XMMREGS) 4265 { 4266 const op = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4267 uint xreg = s.Sreglsw - XMM0; 4268 cdb.genc1(op,modregxrm(2,xreg,BPRM),FLconst,Para.size + s.Soffset); 4269 if (!hasframe) 4270 { // Convert to ESP relative address rather than EBP 4271 code *c = cdb.last(); 4272 c.Irm = cast(ubyte)modregxrm(2,xreg,4); 4273 c.Isib = modregrm(0,4,SP); 4274 c.IEV1.Vpointer += EBPtoESP; 4275 } 4276 continue; 4277 } 4278 4279 cdb.genc1(sz == 1 ? 0x8A : 0x8B, 4280 modregxrm(2,s.Sreglsw,BPRM),FLconst,Para.size + s.Soffset); 4281 code *c = cdb.last(); 4282 if (!I16 && sz == SHORTSIZE) 4283 c.Iflags |= CFopsize; // operand size 4284 if (I64 && sz >= REGSIZE) 4285 c.Irex |= REX_W; 4286 if (I64 && sz == 1 && s.Sreglsw >= 4) 4287 c.Irex |= REX; 4288 if (!hasframe) 4289 { // Convert to ESP relative address rather than EBP 4290 assert(!I16); 4291 c.Irm = cast(ubyte)modregxrm(2,s.Sreglsw,4); 4292 c.Isib = modregrm(0,4,SP); 4293 c.IEV1.Vpointer += EBPtoESP; 4294 } 4295 if (sz > REGSIZE) 4296 { 4297 cdb.genc1(0x8B, 4298 modregxrm(2,s.Sregmsw,BPRM),FLconst,Para.size + s.Soffset + REGSIZE); 4299 code *cx = cdb.last(); 4300 if (I64) 4301 cx.Irex |= REX_W; 4302 if (!hasframe) 4303 { // Convert to ESP relative address rather than EBP 4304 assert(!I16); 4305 cx.Irm = cast(ubyte)modregxrm(2,s.Sregmsw,4); 4306 cx.Isib = modregrm(0,4,SP); 4307 cx.IEV1.Vpointer += EBPtoESP; 4308 } 4309 } 4310 } 4311 } 4312 4313 /******************************* 4314 * Generate and return function epilog. 4315 * Output: 4316 * retsize Size of function epilog 4317 */ 4318 4319 void epilog(block *b) 4320 { 4321 code *cpopds; 4322 reg_t reg; 4323 reg_t regx; // register that's not a return reg 4324 regm_t topop,regm; 4325 targ_size_t xlocalsize = localsize; 4326 4327 CodeBuilder cdbx; cdbx.ctor(); 4328 tym_t tyf = funcsym_p.ty(); 4329 tym_t tym = tybasic(tyf); 4330 bool farfunc = tyfarfunc(tym) != 0; 4331 if (!(b.Bflags & BFLepilog)) // if no epilog code 4332 goto Lret; // just generate RET 4333 regx = (b.BC == BCret) ? AX : CX; 4334 4335 retsize = 0; 4336 4337 if (tyf & mTYnaked) // if no prolog/epilog 4338 return; 4339 4340 if (tym == TYifunc) 4341 { 4342 static immutable ubyte[5] ops2 = [ 0x07,0x1F,0x61,0xCF,0 ]; 4343 static immutable ubyte[12] ops0 = [ 0x07,0x1F,0x5F,0x5E, 4344 0x5D,0x5B,0x5B,0x5A, 4345 0x59,0x58,0xCF,0 ]; 4346 4347 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4348 auto p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 4349 do 4350 cdbx.gen1(*p); 4351 while (*++p); 4352 goto Lopt; 4353 } 4354 4355 if (config.flags & CFGtrace && 4356 (!(config.flags4 & CFG4allcomdat) || 4357 funcsym_p.Sclass == SCcomdat || 4358 funcsym_p.Sclass == SCglobal || 4359 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 4360 ) 4361 ) 4362 { 4363 Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N); 4364 makeitextern(s); 4365 cdbx.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALLF _trace 4366 if (!I16) 4367 code_orflag(cdbx.last(),CFoff | CFselfrel); 4368 useregs((ALLREGS | mBP | mES) & ~s.Sregsaved); 4369 } 4370 4371 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.exe == EX_WIN32 || MARS)) 4372 { 4373 nteh_epilog(cdbx); 4374 } 4375 4376 cpopds = null; 4377 if (tyf & mTYloadds) 4378 { 4379 cdbx.gen1(0x1F); // POP DS 4380 cpopds = cdbx.last(); 4381 } 4382 4383 /* Pop all the general purpose registers saved on the stack 4384 * by the prolog code. Remember to do them in the reverse 4385 * order they were pushed. 4386 */ 4387 topop = fregsaved & ~mfuncreg; 4388 epilog_restoreregs(cdbx, topop); 4389 4390 version (MARS) 4391 { 4392 if (usednteh & NTEHjmonitor) 4393 { 4394 regm_t retregs = 0; 4395 if (b.BC == BCretexp) 4396 retregs = regmask(b.Belem.Ety, tym); 4397 nteh_monitor_epilog(cdbx,retregs); 4398 xlocalsize += 8; 4399 } 4400 } 4401 4402 if (config.wflags & WFwindows && farfunc) 4403 { 4404 int wflags = config.wflags; 4405 if (wflags & WFreduced && !(tyf & mTYexport)) 4406 { // reduced prolog/epilog for non-exported functions 4407 wflags &= ~(WFdgroup | WFds | WFss); 4408 if (!(wflags & WFsaveds)) 4409 goto L4; 4410 } 4411 4412 if (localsize) 4413 { 4414 cdbx.genc1(LEA,modregrm(1,SP,6),FLconst,cast(targ_uns)-2); /* LEA SP,-2[BP] */ 4415 } 4416 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 4417 { 4418 if (cpopds) 4419 cpopds.Iop = NOP; // don't need previous one 4420 cdbx.gen1(0x1F); // POP DS 4421 } 4422 cdbx.gen1(0x58 + BP); // POP BP 4423 if (config.wflags & WFincbp) 4424 cdbx.gen1(0x48 + BP); // DEC BP 4425 assert(hasframe); 4426 } 4427 else 4428 { 4429 if (needframe || (xlocalsize && hasframe)) 4430 { 4431 L4: 4432 assert(hasframe); 4433 if (xlocalsize || enforcealign) 4434 { 4435 if (config.flags2 & CFG2stomp) 4436 { /* MOV ECX,0xBEAF 4437 * L1: 4438 * MOV [ESP],ECX 4439 * ADD ESP,4 4440 * CMP EBP,ESP 4441 * JNE L1 4442 * POP EBP 4443 */ 4444 /* Value should be: 4445 * 1. != 0 (code checks for null pointers) 4446 * 2. be odd (to mess up alignment) 4447 * 3. fall in first 64K (likely marked as inaccessible) 4448 * 4. be a value that stands out in the debugger 4449 */ 4450 assert(I32 || I64); 4451 targ_size_t value = 0x0000BEAF; 4452 reg_t regcx = CX; 4453 mfuncreg &= ~mask(regcx); 4454 uint grex = I64 ? REX_W << 16 : 0; 4455 cdbx.genc2(0xC7,grex | modregrmx(3,0,regcx),value); // MOV regcx,value 4456 cdbx.gen2sib(0x89,grex | modregrm(0,regcx,4),modregrm(0,4,SP)); // MOV [ESP],regcx 4457 code *c1 = cdbx.last(); 4458 cdbx.genc2(0x81,grex | modregrm(3,0,SP),REGSIZE); // ADD ESP,REGSIZE 4459 genregs(cdbx,0x39,SP,BP); // CMP EBP,ESP 4460 if (I64) 4461 code_orrex(cdbx.last(),REX_W); 4462 genjmp(cdbx,JNE,FLcode,cast(block *)c1); // JNE L1 4463 // explicitly mark as short jump, needed for correct retsize calculation (Bugzilla 15779) 4464 cdbx.last().Iflags &= ~CFjmp16; 4465 cdbx.gen1(0x58 + BP); // POP BP 4466 } 4467 else if (config.exe == EX_WIN64) 4468 { // See http://msdn.microsoft.com/en-us/library/tawsa7cb(v=vs.80).aspx 4469 // LEA RSP,0[RBP] 4470 cdbx.genc1(LEA,(REX_W<<16)|modregrm(2,SP,BPRM),FLconst,0); 4471 cdbx.gen1(0x58 + BP); // POP RBP 4472 } 4473 else if (config.target_cpu >= TARGET_80286 && 4474 !(config.target_cpu >= TARGET_80386 && config.flags4 & CFG4speed) 4475 ) 4476 cdbx.gen1(LEAVE); // LEAVE 4477 else if (0 && xlocalsize == REGSIZE && Alloca.size == 0 && I32) 4478 { // This doesn't work - I should figure out why 4479 mfuncreg &= ~mask(regx); 4480 cdbx.gen1(0x58 + regx); // POP regx 4481 cdbx.gen1(0x58 + BP); // POP BP 4482 } 4483 else 4484 { 4485 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4486 if (I64) 4487 code_orrex(cdbx.last(), REX_W); // MOV RSP,RBP 4488 cdbx.gen1(0x58 + BP); // POP BP 4489 } 4490 } 4491 else 4492 cdbx.gen1(0x58 + BP); // POP BP 4493 if (config.wflags & WFincbp && farfunc) 4494 cdbx.gen1(0x48 + BP); // DEC BP 4495 } 4496 else if (xlocalsize == REGSIZE && (!I16 || b.BC == BCret)) 4497 { 4498 mfuncreg &= ~mask(regx); 4499 cdbx.gen1(0x58 + regx); // POP regx 4500 } 4501 else if (xlocalsize) 4502 cod3_stackadj(cdbx, cast(int)-xlocalsize); 4503 } 4504 if (b.BC == BCret || b.BC == BCretexp) 4505 { 4506 Lret: 4507 opcode_t op = tyfarfunc(tym) ? 0xCA : 0xC2; 4508 if (tym == TYhfunc) 4509 { 4510 cdbx.genc2(0xC2,0,4); // RET 4 4511 } 4512 else if (!typfunc(tym) || // if caller cleans the stack 4513 config.exe == EX_WIN64 || 4514 Para.offset == 0) // or nothing pushed on the stack anyway 4515 { 4516 op++; // to a regular RET 4517 cdbx.gen1(op); 4518 } 4519 else 4520 { // Stack is always aligned on register size boundary 4521 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 4522 if (Para.offset >= 0x10000) 4523 { 4524 /* 4525 POP REG 4526 ADD ESP, Para.offset 4527 JMP REG 4528 */ 4529 cdbx.gen1(0x58+regx); 4530 cdbx.genc2(0x81, modregrm(3,0,SP), Para.offset); 4531 if (I64) 4532 code_orrex(cdbx.last(), REX_W); 4533 cdbx.genc2(0xFF, modregrm(3,4,regx), 0); 4534 if (I64) 4535 code_orrex(cdbx.last(), REX_W); 4536 } 4537 else 4538 cdbx.genc2(op,0,Para.offset); // RET Para.offset 4539 } 4540 } 4541 4542 Lopt: 4543 // If last instruction in ce is ADD SP,imm, and first instruction 4544 // in c sets SP, we can dump the ADD. 4545 CodeBuilder cdb; cdb.ctor(); 4546 cdb.append(b.Bcode); 4547 code *cr = cdb.last(); 4548 code *c = cdbx.peek(); 4549 if (cr && c && !I64) 4550 { 4551 if (cr.Iop == 0x81 && cr.Irm == modregrm(3,0,SP)) // if ADD SP,imm 4552 { 4553 if ( 4554 c.Iop == LEAVE || // LEAVE 4555 (c.Iop == 0x8B && c.Irm == modregrm(3,SP,BP)) || // MOV SP,BP 4556 (c.Iop == LEA && c.Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP] 4557 ) 4558 cr.Iop = NOP; 4559 else if (c.Iop == 0x58 + BP) // if POP BP 4560 { 4561 cr.Iop = 0x8B; 4562 cr.Irm = modregrm(3,SP,BP); // MOV SP,BP 4563 } 4564 } 4565 else 4566 { 4567 static if (0) 4568 { 4569 // These optimizations don't work if the called function 4570 // cleans off the stack. 4571 if (c.Iop == 0xC3 && cr.Iop == CALL) // CALL near 4572 { 4573 cr.Iop = 0xE9; // JMP near 4574 c.Iop = NOP; 4575 } 4576 else if (c.Iop == 0xCB && cr.Iop == 0x9A) // CALL far 4577 { 4578 cr.Iop = 0xEA; // JMP far 4579 c.Iop = NOP; 4580 } 4581 } 4582 } 4583 } 4584 4585 pinholeopt(c, null); 4586 retsize += calcblksize(c); // compute size of function epilog 4587 cdb.append(cdbx); 4588 b.Bcode = cdb.finish(); 4589 } 4590 4591 /******************************* 4592 * Return offset of SP from BP. 4593 */ 4594 4595 targ_size_t cod3_spoff() 4596 { 4597 //printf("spoff = x%x, localsize = x%x\n", (int)spoff, (int)localsize); 4598 return spoff + localsize; 4599 } 4600 4601 void gen_spill_reg(ref CodeBuilder cdb, Symbol* s, bool toreg) 4602 { 4603 code cs; 4604 const regm_t keepmsk = toreg ? RMload : RMstore; 4605 4606 elem* e = el_var(s); // so we can trick getlvalue() into working for us 4607 4608 if (mask(s.Sreglsw) & XMMREGS) 4609 { // Convert to save/restore of XMM register 4610 if (toreg) 4611 cs.Iop = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4612 else 4613 cs.Iop = xmmstore(s.Stype.Tty); // MOVSS/D mem,xreg 4614 getlvalue(cdb,&cs,e,keepmsk); 4615 cs.orReg(s.Sreglsw - XMM0); 4616 cdb.gen(&cs); 4617 } 4618 else 4619 { 4620 const int sz = cast(int)type_size(s.Stype); 4621 cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg 4622 cs.Iop ^= (sz == 1); 4623 getlvalue(cdb,&cs,e,keepmsk); 4624 cs.orReg(s.Sreglsw); 4625 if (I64 && sz == 1 && s.Sreglsw >= 4) 4626 cs.Irex |= REX; 4627 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4628 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4629 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4630 { } // skip MOV reg,reg 4631 else 4632 cdb.gen(&cs); 4633 if (sz > REGSIZE) 4634 { 4635 cs.setReg(s.Sregmsw); 4636 getlvalue_msw(&cs); 4637 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4638 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4639 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4640 { } // skip MOV reg,reg 4641 else 4642 cdb.gen(&cs); 4643 } 4644 } 4645 4646 el_free(e); 4647 } 4648 4649 /**************************** 4650 * Generate code for, and output a thunk. 4651 * Params: 4652 * sthunk = Symbol of thunk 4653 * sfunc = Symbol of thunk's target function 4654 * thisty = Type of this pointer 4655 * p = ESP parameter offset to this pointer 4656 * d = offset to add to 'this' pointer 4657 * d2 = offset from 'this' to vptr 4658 * i = offset into vtbl[] 4659 */ 4660 4661 void cod3_thunk(Symbol *sthunk,Symbol *sfunc,uint p,tym_t thisty, 4662 uint d,int i,uint d2) 4663 { 4664 targ_size_t thunkoffset; 4665 4666 int seg = sthunk.Sseg; 4667 cod3_align(seg); 4668 4669 // Skip over return address 4670 tym_t thunkty = tybasic(sthunk.ty()); 4671 if (tyfarfunc(thunkty)) 4672 p += I32 ? 8 : tysize(TYfptr); // far function 4673 else 4674 p += tysize(TYnptr); 4675 if (tybasic(sfunc.ty()) == TYhfunc) 4676 p += tysize(TYnptr); // skip over hidden pointer 4677 4678 CodeBuilder cdb; cdb.ctor(); 4679 if (!I16) 4680 { 4681 /* 4682 Generate: 4683 ADD p[ESP],d 4684 For direct call: 4685 JMP sfunc 4686 For virtual call: 4687 MOV EAX, p[ESP] EAX = this 4688 MOV EAX, d2[EAX] EAX = this.vptr 4689 JMP i[EAX] jump to virtual function 4690 */ 4691 reg_t reg = 0; 4692 if (cast(int)d < 0) 4693 { 4694 d = -d; 4695 reg = 5; // switch from ADD to SUB 4696 } 4697 if (thunkty == TYmfunc) 4698 { // ADD ECX,d 4699 if (d) 4700 cdb.genc2(0x81,modregrm(3,reg,CX),d); 4701 } 4702 else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc)) 4703 { // ADD EAX,d 4704 int rm = AX; 4705 if (config.exe == EX_WIN64) 4706 rm = CX; 4707 else if (I64) 4708 rm = (thunkty == TYnfunc && (sfunc.Sfunc.Fflags3 & F3hiddenPtr)) ? SI : DI; 4709 if (d) 4710 cdb.genc2(0x81,modregrm(3,reg,rm),d); 4711 } 4712 else 4713 { 4714 cdb.genc(0x81,modregrm(2,reg,4), 4715 FLconst,p, // to this 4716 FLconst,d); // ADD p[ESP],d 4717 cdb.last().Isib = modregrm(0,4,SP); 4718 } 4719 if (I64 && cdb.peek()) 4720 cdb.last().Irex |= REX_W; 4721 } 4722 else 4723 { 4724 /* 4725 Generate: 4726 MOV BX,SP 4727 ADD [SS:] p[BX],d 4728 For direct call: 4729 JMP sfunc 4730 For virtual call: 4731 MOV BX, p[BX] BX = this 4732 MOV BX, d2[BX] BX = this.vptr 4733 JMP i[BX] jump to virtual function 4734 */ 4735 4736 genregs(cdb,0x89,SP,BX); // MOV BX,SP 4737 cdb.genc(0x81,modregrm(2,0,7), 4738 FLconst,p, // to this 4739 FLconst,d); // ADD p[BX],d 4740 if (config.wflags & WFssneds || 4741 // If DS needs reloading from SS, 4742 // then assume SS != DS on thunk entry 4743 (LARGEDATA && config.wflags & WFss)) 4744 cdb.last().Iflags |= CFss; // SS: 4745 } 4746 4747 if ((i & 0xFFFF) != 0xFFFF) // if virtual call 4748 { 4749 const bool FARTHIS = (tysize(thisty) > REGSIZE); 4750 const bool FARVPTR = FARTHIS; 4751 4752 assert(thisty != TYvptr); // can't handle this case 4753 4754 if (!I16) 4755 { 4756 assert(!FARTHIS && !LARGECODE); 4757 if (thunkty == TYmfunc) // if 'this' is in ECX 4758 { 4759 // MOV EAX,d2[ECX] 4760 cdb.genc1(0x8B,modregrm(2,AX,CX),FLconst,d2); 4761 } 4762 else if (thunkty == TYjfunc) // if 'this' is in EAX 4763 { 4764 // MOV EAX,d2[EAX] 4765 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4766 } 4767 else 4768 { 4769 // MOV EAX,p[ESP] 4770 cdb.genc1(0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,cast(targ_uns) p); 4771 if (I64) 4772 cdb.last().Irex |= REX_W; 4773 4774 // MOV EAX,d2[EAX] 4775 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4776 } 4777 if (I64) 4778 code_orrex(cdb.last(), REX_W); 4779 // JMP i[EAX] 4780 cdb.genc1(0xFF,modregrm(2,4,0),FLconst,cast(targ_uns) i); 4781 } 4782 else 4783 { 4784 // MOV/LES BX,[SS:] p[BX] 4785 cdb.genc1((FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,cast(targ_uns) p); 4786 if (config.wflags & WFssneds || 4787 // If DS needs reloading from SS, 4788 // then assume SS != DS on thunk entry 4789 (LARGEDATA && config.wflags & WFss)) 4790 cdb.last().Iflags |= CFss; // SS: 4791 4792 // MOV/LES BX,[ES:]d2[BX] 4793 cdb.genc1((FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2); 4794 if (FARTHIS) 4795 cdb.last().Iflags |= CFes; // ES: 4796 4797 // JMP i[BX] 4798 cdb.genc1(0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,cast(targ_uns) i); 4799 if (FARVPTR) 4800 cdb.last().Iflags |= CFes; // ES: 4801 } 4802 } 4803 else 4804 { 4805 static if (0) 4806 { 4807 localgot = null; // no local variables 4808 code *c1 = load_localgot(); 4809 if (c1) 4810 { 4811 assignaddrc(c1); 4812 cdb.append(c1); 4813 } 4814 } 4815 cdb.gencs((LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); // JMP sfunc 4816 cdb.last().Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff); 4817 } 4818 4819 thunkoffset = Offset(seg); 4820 code *c = cdb.finish(); 4821 pinholeopt(c,null); 4822 codout(seg,c); 4823 code_free(c); 4824 4825 sthunk.Soffset = thunkoffset; 4826 sthunk.Ssize = Offset(seg) - thunkoffset; // size of thunk 4827 sthunk.Sseg = seg; 4828 if (config.exe & EX_posix || 4829 config.objfmt == OBJ_MSCOFF) 4830 { 4831 objmod.pubdef(seg,sthunk,sthunk.Soffset); 4832 } 4833 searchfixlist(sthunk); // resolve forward refs 4834 } 4835 4836 /***************************** 4837 * Assume symbol s is extern. 4838 */ 4839 4840 void makeitextern(Symbol *s) 4841 { 4842 if (s.Sxtrnnum == 0) 4843 { 4844 s.Sclass = SCextern; /* external */ 4845 /*printf("makeitextern(x%x)\n",s);*/ 4846 objmod.external(s); 4847 } 4848 } 4849 4850 4851 /******************************* 4852 * Replace JMPs in Bgotocode with JMP SHORTs whereever possible. 4853 * This routine depends on FLcode jumps to only be forward 4854 * referenced. 4855 * BFLjmpoptdone is set to true if nothing more can be done 4856 * with this block. 4857 * Input: 4858 * flag !=0 means don't have correct Boffsets yet 4859 * Returns: 4860 * number of bytes saved 4861 */ 4862 4863 int branch(block *bl,int flag) 4864 { 4865 int bytesaved; 4866 code* c,cn,ct; 4867 targ_size_t offset,disp; 4868 targ_size_t csize; 4869 4870 if (!flag) 4871 bl.Bflags |= BFLjmpoptdone; // assume this will be all 4872 c = bl.Bcode; 4873 if (!c) 4874 return 0; 4875 bytesaved = 0; 4876 offset = bl.Boffset; /* offset of start of block */ 4877 while (1) 4878 { 4879 ubyte op; 4880 4881 csize = calccodsize(c); 4882 cn = code_next(c); 4883 op = cast(ubyte)c.Iop; 4884 if ((op & ~0x0F) == 0x70 && c.Iflags & CFjmp16 || 4885 (op == JMP && !(c.Iflags & CFjmp5))) 4886 { 4887 L1: 4888 switch (c.IFL2) 4889 { 4890 case FLblock: 4891 if (flag) // no offsets yet, don't optimize 4892 goto L3; 4893 disp = c.IEV2.Vblock.Boffset - offset - csize; 4894 4895 /* If this is a forward branch, and there is an aligned 4896 * block intervening, it is possible that shrinking 4897 * the jump instruction will cause it to be out of 4898 * range of the target. This happens if the alignment 4899 * prevents the target block from moving correspondingly 4900 * closer. 4901 */ 4902 if (disp >= 0x7F-4 && c.IEV2.Vblock.Boffset > offset) 4903 { /* Look for intervening alignment 4904 */ 4905 for (block *b = bl.Bnext; b; b = b.Bnext) 4906 { 4907 if (b.Balign) 4908 { 4909 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 4910 goto L3; 4911 } 4912 if (b == c.IEV2.Vblock) 4913 break; 4914 } 4915 } 4916 4917 break; 4918 4919 case FLcode: 4920 { 4921 code *cr; 4922 4923 disp = 0; 4924 4925 ct = c.IEV2.Vcode; /* target of branch */ 4926 assert(ct.Iflags & (CFtarg | CFtarg2)); 4927 for (cr = cn; cr; cr = code_next(cr)) 4928 { 4929 if (cr == ct) 4930 break; 4931 disp += calccodsize(cr); 4932 } 4933 4934 if (!cr) 4935 { // Didn't find it in forward search. Try backwards jump 4936 int s = 0; 4937 disp = 0; 4938 for (cr = bl.Bcode; cr != cn; cr = code_next(cr)) 4939 { 4940 assert(cr != null); // must have found it 4941 if (cr == ct) 4942 s = 1; 4943 if (s) 4944 disp += calccodsize(cr); 4945 } 4946 } 4947 4948 if (config.flags4 & CFG4optimized && !flag) 4949 { 4950 /* Propagate branch forward past junk */ 4951 while (1) 4952 { 4953 if (ct.Iop == NOP || 4954 ct.Iop == (ESCAPE | ESClinnum)) 4955 { 4956 ct = code_next(ct); 4957 if (!ct) 4958 goto L2; 4959 } 4960 else 4961 { 4962 c.IEV2.Vcode = ct; 4963 ct.Iflags |= CFtarg; 4964 break; 4965 } 4966 } 4967 4968 /* And eliminate jmps to jmps */ 4969 if ((op == ct.Iop || ct.Iop == JMP) && 4970 (op == JMP || c.Iflags & CFjmp16)) 4971 { 4972 c.IFL2 = ct.IFL2; 4973 c.IEV2.Vcode = ct.IEV2.Vcode; 4974 /*printf("eliminating branch\n");*/ 4975 goto L1; 4976 } 4977 L2: 4978 { } 4979 } 4980 } 4981 break; 4982 4983 default: 4984 goto L3; 4985 } 4986 4987 if (disp == 0) // bra to next instruction 4988 { 4989 bytesaved += csize; 4990 c.Iop = NOP; // del branch instruction 4991 c.IEV2.Vcode = null; 4992 c = cn; 4993 if (!c) 4994 break; 4995 continue; 4996 } 4997 else if (cast(targ_size_t)cast(targ_schar)(disp - 2) == (disp - 2) && 4998 cast(targ_size_t)cast(targ_schar)disp == disp) 4999 { 5000 if (op == JMP) 5001 { 5002 c.Iop = JMPS; // JMP SHORT 5003 bytesaved += I16 ? 1 : 3; 5004 } 5005 else // else Jcond 5006 { 5007 c.Iflags &= ~CFjmp16; // a branch is ok 5008 bytesaved += I16 ? 3 : 4; 5009 5010 // Replace a cond jump around a call to a function that 5011 // never returns with a cond jump to that function. 5012 if (config.flags4 & CFG4optimized && 5013 config.target_cpu >= TARGET_80386 && 5014 disp == (I16 ? 3 : 5) && 5015 cn && 5016 cn.Iop == CALL && 5017 cn.IFL2 == FLfunc && 5018 cn.IEV2.Vsym.Sflags & SFLexit && 5019 !(cn.Iflags & (CFtarg | CFtarg2)) 5020 ) 5021 { 5022 cn.Iop = 0x0F00 | ((c.Iop & 0x0F) ^ 0x81); 5023 c.Iop = NOP; 5024 c.IEV2.Vcode = null; 5025 bytesaved++; 5026 5027 // If nobody else points to ct, we can remove the CFtarg 5028 if (flag && ct) 5029 { 5030 code *cx; 5031 for (cx = bl.Bcode; 1; cx = code_next(cx)) 5032 { 5033 if (!cx) 5034 { 5035 ct.Iflags &= ~CFtarg; 5036 break; 5037 } 5038 if (cx.IEV2.Vcode == ct) 5039 break; 5040 } 5041 } 5042 } 5043 } 5044 csize = calccodsize(c); 5045 } 5046 else 5047 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 5048 } 5049 L3: 5050 if (cn) 5051 { 5052 offset += csize; 5053 c = cn; 5054 } 5055 else 5056 break; 5057 } 5058 //printf("bytesaved = x%x\n",bytesaved); 5059 return bytesaved; 5060 } 5061 5062 5063 /************************************************ 5064 * Adjust all Soffset's of stack variables so they 5065 * are all relative to the frame pointer. 5066 */ 5067 5068 version (MARS) 5069 { 5070 void cod3_adjSymOffsets() 5071 { 5072 SYMIDX si; 5073 5074 //printf("cod3_adjSymOffsets()\n"); 5075 for (si = 0; si < globsym.length; si++) 5076 { 5077 //printf("\tglobsym[%d] = %p\n",si,globsym[si]); 5078 Symbol *s = globsym[si]; 5079 5080 switch (s.Sclass) 5081 { 5082 case SCparameter: 5083 case SCregpar: 5084 case SCshadowreg: 5085 //printf("s = '%s', Soffset = x%x, Para.size = x%x, EBPtoESP = x%x\n", s.Sident, s.Soffset, Para.size, EBPtoESP); 5086 s.Soffset += Para.size; 5087 if (0 && !(funcsym_p.Sfunc.Fflags3 & Fmember)) 5088 { 5089 if (!hasframe) 5090 s.Soffset += EBPtoESP; 5091 if (funcsym_p.Sfunc.Fflags3 & Fnested) 5092 s.Soffset += REGSIZE; 5093 } 5094 break; 5095 5096 case SCfastpar: 5097 //printf("\tfastpar %s %p Soffset %x Fast.size %x BPoff %x\n", s.Sident, s, (int)s.Soffset, (int)Fast.size, (int)BPoff); 5098 s.Soffset += Fast.size + BPoff; 5099 break; 5100 5101 case SCauto: 5102 case SCregister: 5103 if (s.Sfl == FLfast) 5104 s.Soffset += Fast.size + BPoff; 5105 else 5106 //printf("s = '%s', Soffset = x%x, Auto.size = x%x, BPoff = x%x EBPtoESP = x%x\n", s.Sident, (int)s.Soffset, (int)Auto.size, (int)BPoff, (int)EBPtoESP); 5107 // if (!(funcsym_p.Sfunc.Fflags3 & Fnested)) 5108 s.Soffset += Auto.size + BPoff; 5109 break; 5110 5111 case SCbprel: 5112 break; 5113 5114 default: 5115 continue; 5116 } 5117 static if (0) 5118 { 5119 if (!hasframe) 5120 s.Soffset += EBPtoESP; 5121 } 5122 } 5123 } 5124 5125 } 5126 5127 /******************************* 5128 * Take symbol info in union ev and replace it with a real address 5129 * in Vpointer. 5130 */ 5131 5132 void assignaddr(block *bl) 5133 { 5134 int EBPtoESPsave = EBPtoESP; 5135 int hasframesave = hasframe; 5136 5137 if (bl.Bflags & BFLoutsideprolog) 5138 { 5139 EBPtoESP = -REGSIZE; 5140 hasframe = 0; 5141 } 5142 assignaddrc(bl.Bcode); 5143 hasframe = hasframesave; 5144 EBPtoESP = EBPtoESPsave; 5145 } 5146 5147 void assignaddrc(code *c) 5148 { 5149 int sn; 5150 Symbol *s; 5151 ubyte ins,rm; 5152 targ_size_t soff; 5153 targ_size_t base; 5154 5155 base = EBPtoESP; 5156 for (; c; c = code_next(c)) 5157 { 5158 debug 5159 { 5160 if (0) 5161 { printf("assignaddrc()\n"); 5162 code_print(c); 5163 } 5164 if (code_next(c) && code_next(code_next(c)) == c) 5165 assert(0); 5166 } 5167 5168 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 5169 ins = vex_inssize(c); 5170 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 5171 ins = inssize2[(c.Iop >> 8) & 0xFF]; 5172 else if ((c.Iop & 0xFF00) == 0x0F00) 5173 ins = inssize2[c.Iop & 0xFF]; 5174 else if ((c.Iop & 0xFF) == ESCAPE) 5175 { 5176 if (c.Iop == (ESCAPE | ESCadjesp)) 5177 { 5178 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,(long)c.IEV1.Vint); 5179 EBPtoESP += c.IEV1.Vint; 5180 c.Iop = NOP; 5181 } 5182 else if (c.Iop == (ESCAPE | ESCfixesp)) 5183 { 5184 //printf("fix ESP\n"); 5185 if (hasframe) 5186 { 5187 // LEA ESP,-EBPtoESP[EBP] 5188 c.Iop = LEA; 5189 if (c.Irm & 8) 5190 c.Irex |= REX_R; 5191 c.Irm = modregrm(2,SP,BP); 5192 c.Iflags = CFoff; 5193 c.IFL1 = FLconst; 5194 c.IEV1.Vuns = -EBPtoESP; 5195 if (enforcealign) 5196 { 5197 // AND ESP, -STACKALIGN 5198 code *cn = code_calloc(); 5199 cn.Iop = 0x81; 5200 cn.Irm = modregrm(3, 4, SP); 5201 cn.Iflags = CFoff; 5202 cn.IFL2 = FLconst; 5203 cn.IEV2.Vsize_t = -STACKALIGN; 5204 if (I64) 5205 c.Irex |= REX_W; 5206 cn.next = c.next; 5207 c.next = cn; 5208 } 5209 } 5210 } 5211 else if (c.Iop == (ESCAPE | ESCframeptr)) 5212 { // Convert to load of frame pointer 5213 // c.Irm is the register to use 5214 if (hasframe && !enforcealign) 5215 { // MOV reg,EBP 5216 c.Iop = 0x89; 5217 if (c.Irm & 8) 5218 c.Irex |= REX_B; 5219 c.Irm = modregrm(3,BP,c.Irm & 7); 5220 } 5221 else 5222 { // LEA reg,EBPtoESP[ESP] 5223 c.Iop = LEA; 5224 if (c.Irm & 8) 5225 c.Irex |= REX_R; 5226 c.Irm = modregrm(2,c.Irm & 7,4); 5227 c.Isib = modregrm(0,4,SP); 5228 c.Iflags = CFoff; 5229 c.IFL1 = FLconst; 5230 c.IEV1.Vuns = EBPtoESP; 5231 } 5232 } 5233 if (I64) 5234 c.Irex |= REX_W; 5235 continue; 5236 } 5237 else 5238 ins = inssize[c.Iop & 0xFF]; 5239 if (!(ins & M) || 5240 ((rm = c.Irm) & 0xC0) == 0xC0) 5241 goto do2; /* if no first operand */ 5242 if (is32bitaddr(I32,c.Iflags)) 5243 { 5244 5245 if ( 5246 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 5247 ) 5248 goto do2; /* if no first operand */ 5249 } 5250 else 5251 { 5252 if ( 5253 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 5254 ) 5255 goto do2; /* if no first operand */ 5256 } 5257 s = c.IEV1.Vsym; 5258 switch (c.IFL1) 5259 { 5260 case FLdata: 5261 if (config.objfmt == OBJ_OMF && s.Sclass != SCcomdat && s.Sclass != SCextern) 5262 { 5263 version (MARS) 5264 { 5265 c.IEV1.Vseg = s.Sseg; 5266 } 5267 else 5268 { 5269 c.IEV1.Vseg = DATA; 5270 } 5271 c.IEV1.Vpointer += s.Soffset; 5272 c.IFL1 = FLdatseg; 5273 } 5274 else 5275 c.IFL1 = FLextern; 5276 goto do2; 5277 5278 case FLudata: 5279 if (config.objfmt == OBJ_OMF) 5280 { 5281 version (MARS) 5282 { 5283 c.IEV1.Vseg = s.Sseg; 5284 } 5285 else 5286 { 5287 c.IEV1.Vseg = UDATA; 5288 } 5289 c.IEV1.Vpointer += s.Soffset; 5290 c.IFL1 = FLdatseg; 5291 } 5292 else 5293 c.IFL1 = FLextern; 5294 goto do2; 5295 5296 case FLtlsdata: 5297 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5298 c.IFL1 = FLextern; 5299 goto do2; 5300 5301 case FLdatseg: 5302 //c.IEV1.Vseg = DATA; 5303 goto do2; 5304 5305 case FLfardata: 5306 case FLcsdata: 5307 case FLpseudo: 5308 goto do2; 5309 5310 case FLstack: 5311 //printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n", 5312 //s.Soffset,EBPtoESP,base,c.IEV1.Vpointer); 5313 c.IEV1.Vpointer += s.Soffset + EBPtoESP - base - EEStack.offset; 5314 break; 5315 5316 case FLfast: 5317 soff = Fast.size; 5318 goto L1; 5319 5320 case FLreg: 5321 case FLauto: 5322 soff = Auto.size; 5323 L1: 5324 if (Symbol_Sisdead(s, anyiasm)) 5325 { 5326 c.Iop = NOP; // remove references to it 5327 continue; 5328 } 5329 if (s.Sfl == FLreg && c.IEV1.Vpointer < 2) 5330 { 5331 reg_t reg = s.Sreglsw; 5332 5333 assert(!(s.Sregm & ~mask(reg))); 5334 if (c.IEV1.Vpointer == 1) 5335 { 5336 assert(reg < 4); /* must be a BYTEREGS */ 5337 reg |= 4; /* convert to high byte reg */ 5338 } 5339 if (reg & 8) 5340 { 5341 assert(I64); 5342 c.Irex |= REX_B; 5343 reg &= 7; 5344 } 5345 c.Irm = (c.Irm & modregrm(0,7,0)) 5346 | modregrm(3,0,reg); 5347 assert(c.Iop != LES && c.Iop != LEA); 5348 goto do2; 5349 } 5350 else 5351 { c.IEV1.Vpointer += s.Soffset + soff + BPoff; 5352 if (s.Sflags & SFLunambig) 5353 c.Iflags |= CFunambig; 5354 L2: 5355 if (!hasframe || (enforcealign && c.IFL1 != FLpara)) 5356 { /* Convert to ESP relative address instead of EBP */ 5357 assert(!I16); 5358 c.IEV1.Vpointer += EBPtoESP; 5359 ubyte crm = c.Irm; 5360 if ((crm & 7) == 4) // if SIB byte 5361 { 5362 assert((c.Isib & 7) == BP); 5363 assert((crm & 0xC0) != 0); 5364 c.Isib = (c.Isib & ~7) | modregrm(0,0,SP); 5365 } 5366 else 5367 { 5368 assert((crm & 7) == 5); 5369 c.Irm = (crm & modregrm(0,7,0)) 5370 | modregrm(2,0,4); 5371 c.Isib = modregrm(0,4,SP); 5372 } 5373 } 5374 } 5375 break; 5376 5377 case FLpara: 5378 //printf("s = %s, Soffset = %d, Para.size = %d, BPoff = %d, EBPtoESP = %d\n", s.Sident.ptr, s.Soffset, Para.size, BPoff, EBPtoESP); 5379 soff = Para.size - BPoff; // cancel out add of BPoff 5380 goto L1; 5381 5382 case FLfltreg: 5383 c.IEV1.Vpointer += Foff + BPoff; 5384 c.Iflags |= CFunambig; 5385 goto L2; 5386 5387 case FLallocatmp: 5388 c.IEV1.Vpointer += Alloca.offset + BPoff; 5389 goto L2; 5390 5391 case FLfuncarg: 5392 c.IEV1.Vpointer += cgstate.funcarg.offset + BPoff; 5393 goto L2; 5394 5395 case FLbprel: 5396 c.IEV1.Vpointer += s.Soffset; 5397 break; 5398 5399 case FLcs: 5400 sn = c.IEV1.Vuns; 5401 if (!CSE.loaded(sn)) // if never loaded 5402 { 5403 c.Iop = NOP; 5404 continue; 5405 } 5406 c.IEV1.Vpointer = CSE.offset(sn) + CSoff + BPoff; 5407 c.Iflags |= CFunambig; 5408 goto L2; 5409 5410 case FLregsave: 5411 sn = c.IEV1.Vuns; 5412 c.IEV1.Vpointer = sn + regsave.off + BPoff; 5413 c.Iflags |= CFunambig; 5414 goto L2; 5415 5416 case FLndp: 5417 version (MARS) 5418 { 5419 assert(c.IEV1.Vuns < global87.save.length); 5420 } 5421 c.IEV1.Vpointer = c.IEV1.Vuns * tysize(TYldouble) + NDPoff + BPoff; 5422 c.Iflags |= CFunambig; 5423 goto L2; 5424 5425 case FLoffset: 5426 break; 5427 5428 case FLlocalsize: 5429 c.IEV1.Vpointer += localsize; 5430 break; 5431 5432 case FLconst: 5433 default: 5434 goto do2; 5435 } 5436 c.IFL1 = FLconst; 5437 do2: 5438 /* Ignore TEST (F6 and F7) opcodes */ 5439 if (!(ins & T)) goto done; /* if no second operand */ 5440 s = c.IEV2.Vsym; 5441 switch (c.IFL2) 5442 { 5443 case FLdata: 5444 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5445 { 5446 c.IFL2 = FLextern; 5447 goto do2; 5448 } 5449 else 5450 { 5451 if (s.Sclass == SCcomdat) 5452 { c.IFL2 = FLextern; 5453 goto do2; 5454 } 5455 c.IEV2.Vseg = MARS ? s.Sseg : DATA; 5456 c.IEV2.Vpointer += s.Soffset; 5457 c.IFL2 = FLdatseg; 5458 goto done; 5459 } 5460 5461 case FLudata: 5462 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5463 { 5464 c.IFL2 = FLextern; 5465 goto do2; 5466 } 5467 else 5468 { 5469 c.IEV2.Vseg = MARS ? s.Sseg : UDATA; 5470 c.IEV2.Vpointer += s.Soffset; 5471 c.IFL2 = FLdatseg; 5472 goto done; 5473 } 5474 5475 case FLtlsdata: 5476 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5477 { 5478 c.IFL2 = FLextern; 5479 goto do2; 5480 } 5481 goto done; 5482 5483 case FLdatseg: 5484 //c.IEV2.Vseg = DATA; 5485 goto done; 5486 5487 case FLcsdata: 5488 case FLfardata: 5489 goto done; 5490 5491 case FLreg: 5492 case FLpseudo: 5493 assert(0); 5494 /* NOTREACHED */ 5495 5496 case FLfast: 5497 c.IEV2.Vpointer += s.Soffset + Fast.size + BPoff; 5498 break; 5499 5500 case FLauto: 5501 c.IEV2.Vpointer += s.Soffset + Auto.size + BPoff; 5502 L3: 5503 if (!hasframe || (enforcealign && c.IFL2 != FLpara)) 5504 /* Convert to ESP relative address instead of EBP */ 5505 c.IEV2.Vpointer += EBPtoESP; 5506 break; 5507 5508 case FLpara: 5509 c.IEV2.Vpointer += s.Soffset + Para.size; 5510 goto L3; 5511 5512 case FLfltreg: 5513 c.IEV2.Vpointer += Foff + BPoff; 5514 goto L3; 5515 5516 case FLallocatmp: 5517 c.IEV2.Vpointer += Alloca.offset + BPoff; 5518 goto L3; 5519 5520 case FLfuncarg: 5521 c.IEV2.Vpointer += cgstate.funcarg.offset + BPoff; 5522 goto L3; 5523 5524 case FLbprel: 5525 c.IEV2.Vpointer += s.Soffset; 5526 break; 5527 5528 case FLstack: 5529 c.IEV2.Vpointer += s.Soffset + EBPtoESP - base; 5530 break; 5531 5532 case FLcs: 5533 case FLndp: 5534 case FLregsave: 5535 assert(0); 5536 5537 case FLconst: 5538 break; 5539 5540 case FLlocalsize: 5541 c.IEV2.Vpointer += localsize; 5542 break; 5543 5544 default: 5545 goto done; 5546 } 5547 c.IFL2 = FLconst; 5548 done: 5549 { } 5550 } 5551 } 5552 5553 /******************************* 5554 * Return offset from BP of symbol s. 5555 */ 5556 5557 targ_size_t cod3_bpoffset(Symbol *s) 5558 { 5559 targ_size_t offset; 5560 5561 symbol_debug(s); 5562 offset = s.Soffset; 5563 switch (s.Sfl) 5564 { 5565 case FLpara: 5566 offset += Para.size; 5567 break; 5568 5569 case FLfast: 5570 offset += Fast.size + BPoff; 5571 break; 5572 5573 case FLauto: 5574 offset += Auto.size + BPoff; 5575 break; 5576 5577 default: 5578 WRFL(cast(FL)s.Sfl); 5579 symbol_print(s); 5580 assert(0); 5581 } 5582 assert(hasframe); 5583 return offset; 5584 } 5585 5586 5587 /******************************* 5588 * Find shorter versions of the same instructions. 5589 * Does these optimizations: 5590 * replaces jmps to the next instruction with NOPs 5591 * sign extension of modregrm displacement 5592 * sign extension of immediate data (can't do it for OR, AND, XOR 5593 * as the opcodes are not defined) 5594 * short versions for AX EA 5595 * short versions for reg EA 5596 * Code is neither removed nor added. 5597 * Params: 5598 * b = block for code (or null) 5599 * c = code list to optimize 5600 */ 5601 5602 void pinholeopt(code *c,block *b) 5603 { 5604 targ_size_t a; 5605 uint mod; 5606 ubyte ins; 5607 int usespace; 5608 int useopsize; 5609 int space; 5610 block *bn; 5611 5612 debug 5613 { 5614 __gshared int tested; if (!tested) { tested++; pinholeopt_unittest(); } 5615 } 5616 5617 debug 5618 { 5619 code *cstart = c; 5620 if (debugc) 5621 { 5622 printf("+pinholeopt(%p)\n",c); 5623 } 5624 } 5625 5626 if (b) 5627 { 5628 bn = b.Bnext; 5629 usespace = (config.flags4 & CFG4space && b.BC != BCasm); 5630 useopsize = (I16 || (config.flags4 & CFG4space && b.BC != BCasm)); 5631 } 5632 else 5633 { 5634 bn = null; 5635 usespace = (config.flags4 & CFG4space); 5636 useopsize = (I16 || config.flags4 & CFG4space); 5637 } 5638 for (; c; c = code_next(c)) 5639 { 5640 L1: 5641 opcode_t op = c.Iop; 5642 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 5643 ins = vex_inssize(c); 5644 else if ((op & 0xFFFD00) == 0x0F3800) 5645 ins = inssize2[(op >> 8) & 0xFF]; 5646 else if ((op & 0xFF00) == 0x0F00) 5647 ins = inssize2[op & 0xFF]; 5648 else 5649 ins = inssize[op & 0xFF]; 5650 if (ins & M) // if modregrm byte 5651 { 5652 int shortop = (c.Iflags & CFopsize) ? !I16 : I16; 5653 int local_BPRM = BPRM; 5654 5655 if (c.Iflags & CFaddrsize) 5656 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 5657 5658 uint rm = c.Irm; 5659 reg_t reg = rm & modregrm(0,7,0); // isolate reg field 5660 reg_t ereg = rm & 7; 5661 //printf("c = %p, op = %02x rm = %02x\n", c, op, rm); 5662 5663 /* If immediate second operand */ 5664 if ((ins & T || 5665 ((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0))) 5666 ) && 5667 c.IFL2 == FLconst) 5668 { 5669 int flags = c.Iflags & CFpsw; /* if want result in flags */ 5670 targ_long u = c.IEV2.Vuns; 5671 if (ins & E) 5672 u = cast(byte) u; 5673 else if (shortop) 5674 u = cast(short) u; 5675 5676 // Replace CMP reg,0 with TEST reg,reg 5677 if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm 5678 rm >= modregrm(3,7,AX) && 5679 u == 0) 5680 { 5681 c.Iop = (op & 1) | 0x84; 5682 c.Irm = modregrm(3,ereg,ereg); 5683 if (c.Irex & REX_B) 5684 c.Irex |= REX_R; 5685 goto L1; 5686 } 5687 5688 /* Optimize ANDs with an immediate constant */ 5689 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0)) 5690 { 5691 if (rm >= modregrm(3,4,AX)) // AND reg,imm 5692 { 5693 if (u == 0) 5694 { 5695 /* Replace with XOR reg,reg */ 5696 c.Iop = 0x30 | (op & 1); 5697 c.Irm = modregrm(3,ereg,ereg); 5698 if (c.Irex & REX_B) 5699 c.Irex |= REX_R; 5700 goto L1; 5701 } 5702 if (u == 0xFFFFFFFF && !flags) 5703 { 5704 c.Iop = NOP; 5705 goto L1; 5706 } 5707 } 5708 if (op == 0x81 && !flags) 5709 { // If we can do the operation in one byte 5710 5711 // If EA is not SI or DI 5712 if ((rm < modregrm(3,4,SP) || I64) && 5713 (config.flags4 & CFG4space || 5714 config.target_cpu < TARGET_PentiumPro) 5715 ) 5716 { 5717 if ((u & 0xFFFFFF00) == 0xFFFFFF00) 5718 goto L2; 5719 else if (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4)) 5720 { 5721 if (!shortop) 5722 { 5723 if ((u & 0xFFFF00FF) == 0xFFFF00FF) 5724 goto L3; 5725 } 5726 else 5727 { 5728 if ((u & 0xFF) == 0xFF) 5729 goto L3; 5730 } 5731 } 5732 } 5733 if (!shortop && useopsize) 5734 { 5735 if ((u & 0xFFFF0000) == 0xFFFF0000) 5736 { 5737 c.Iflags ^= CFopsize; 5738 goto L1; 5739 } 5740 if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX)) 5741 { 5742 c.IEV1.Voffset += 2; /* address MSW */ 5743 c.IEV2.Vuns >>= 16; 5744 c.Iflags ^= CFopsize; 5745 goto L1; 5746 } 5747 if (rm >= modregrm(3,4,AX)) 5748 { 5749 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64)) 5750 { 5751 c.Iop = MOVZXb; // MOVZX 5752 c.Irm = modregrm(3,ereg,ereg); 5753 if (c.Irex & REX_B) 5754 c.Irex |= REX_R; 5755 goto L1; 5756 } 5757 if (u == 0xFFFF) 5758 { 5759 c.Iop = MOVZXw; // MOVZX 5760 c.Irm = modregrm(3,ereg,ereg); 5761 if (c.Irex & REX_B) 5762 c.Irex |= REX_R; 5763 goto L1; 5764 } 5765 } 5766 } 5767 } 5768 } 5769 5770 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */ 5771 if (!flags && 5772 (op == 0x81 || op == 0x80) && 5773 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR 5774 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR 5775 ) 5776 { 5777 if (u == 0) 5778 { 5779 c.Iop = NOP; 5780 goto L1; 5781 } 5782 if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */ 5783 { 5784 c.Iop = 0xF6 | (op & 1); /* NOT */ 5785 c.Irm ^= modregrm(0,6^2,0); 5786 goto L1; 5787 } 5788 if (!shortop && 5789 useopsize && 5790 op == 0x81 && 5791 (u & 0xFFFF0000) == 0 && 5792 (reg == modregrm(0,6,0) || reg == modregrm(0,1,0))) 5793 { 5794 c.Iflags ^= CFopsize; 5795 goto L1; 5796 } 5797 } 5798 5799 /* Look for TEST or OR or XOR with an immediate constant */ 5800 /* that we can replace with a byte operation */ 5801 if (op == 0xF7 && reg == modregrm(0,0,0) || 5802 op == 0x81 && reg == modregrm(0,6,0) && !flags || 5803 op == 0x81 && reg == modregrm(0,1,0)) 5804 { 5805 // See if we can replace a dword with a word 5806 // (avoid for 32 bit instructions, because CFopsize 5807 // is too slow) 5808 if (!shortop && useopsize) 5809 { 5810 if ((u & 0xFFFF0000) == 0) 5811 { 5812 c.Iflags ^= CFopsize; 5813 goto L1; 5814 } 5815 /* If memory (not register) addressing mode */ 5816 if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX)) 5817 { 5818 c.IEV1.Voffset += 2; /* address MSW */ 5819 c.IEV2.Vuns >>= 16; 5820 c.Iflags ^= CFopsize; 5821 goto L1; 5822 } 5823 } 5824 5825 // If EA is not SI or DI 5826 if (rm < (modregrm(3,0,SP) | reg) && 5827 (usespace || 5828 config.target_cpu < TARGET_PentiumPro) 5829 ) 5830 { 5831 if ((u & 0xFFFFFF00) == 0) 5832 { 5833 L2: c.Iop--; /* to byte instruction */ 5834 c.Iflags &= ~CFopsize; 5835 goto L1; 5836 } 5837 if (((u & 0xFFFF00FF) == 0 || 5838 (shortop && (u & 0xFF) == 0)) && 5839 (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4))) 5840 { 5841 L3: 5842 c.IEV2.Vuns >>= 8; 5843 if (rm >= (modregrm(3,0,AX) | reg)) 5844 c.Irm |= 4; /* AX.AH, BX.BH, etc. */ 5845 else 5846 c.IEV1.Voffset += 1; 5847 goto L2; 5848 } 5849 } 5850 5851 // BUG: which is right? 5852 //else if ((u & 0xFFFF0000) == 0) 5853 5854 else if (0 && op == 0xF7 && 5855 rm >= modregrm(3,0,SP) && 5856 (u & 0xFFFF0000) == 0) 5857 5858 c.Iflags &= ~CFopsize; 5859 } 5860 5861 // Try to replace TEST reg,-1 with TEST reg,reg 5862 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8 5863 { 5864 if ((u & 0xFF) == 0xFF) 5865 { 5866 L4: 5867 c.Iop = 0x84; // TEST regL,regL 5868 c.Irm = modregrm(3,ereg,ereg); 5869 if (c.Irex & REX_B) 5870 c.Irex |= REX_R; 5871 c.Iflags &= ~CFopsize; 5872 goto L1; 5873 } 5874 } 5875 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4)) 5876 { 5877 if (u == 0xFF) 5878 { 5879 if (ereg & 4) // SIL,DIL,BPL,SPL need REX prefix 5880 c.Irex |= REX; 5881 goto L4; 5882 } 5883 if ((u & 0xFFFF) == 0xFF00 && shortop && !c.Irex && ereg < 4) 5884 { 5885 ereg |= 4; /* to regH */ 5886 goto L4; 5887 } 5888 } 5889 5890 /* Look for sign extended immediate data */ 5891 if (cast(byte) u == u) 5892 { 5893 if (op == 0x81) 5894 { 5895 if (reg != 0x08 && reg != 0x20 && reg != 0x30) 5896 c.Iop = op = 0x83; /* 8 bit sgn ext */ 5897 } 5898 else if (op == 0x69) /* IMUL rw,ew,dw */ 5899 c.Iop = op = 0x6B; /* IMUL rw,ew,db */ 5900 } 5901 5902 // Look for SHIFT EA,imm8 we can replace with short form 5903 if (u == 1 && ((op & 0xFE) == 0xC0)) 5904 c.Iop |= 0xD0; 5905 5906 } /* if immediate second operand */ 5907 5908 /* Look for AX short form */ 5909 if (ins & A) 5910 { 5911 if (rm == modregrm(0,AX,local_BPRM) && 5912 !(c.Irex & REX_R) && // and it's AX, not R8 5913 (op & ~3) == 0x88 && 5914 !I64) 5915 { 5916 op = ((op & 3) + 0xA0) ^ 2; 5917 /* 8A. A0 */ 5918 /* 8B. A1 */ 5919 /* 88. A2 */ 5920 /* 89. A3 */ 5921 c.Iop = op; 5922 c.IFL2 = c.IFL1; 5923 c.IEV2 = c.IEV1; 5924 } 5925 5926 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */ 5927 else if (!I16 && 5928 (op == 0x89 || op == 0x8B) && 5929 (rm & 0xC0) == 0xC0 && 5930 (!b || b.BC != BCasm) 5931 ) 5932 c.Iflags &= ~CFopsize; 5933 5934 // If rm is AX 5935 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c.Irex & (REX_R | REX_B))) 5936 { 5937 switch (op) 5938 { 5939 case 0x80: op = reg | 4; break; 5940 case 0x81: op = reg | 5; break; 5941 case 0x87: op = 0x90 + (reg>>3); break; // XCHG 5942 5943 case 0xF6: 5944 if (reg == 0) 5945 op = 0xA8; /* TEST AL,immed8 */ 5946 break; 5947 5948 case 0xF7: 5949 if (reg == 0) 5950 op = 0xA9; /* TEST AX,immed16 */ 5951 break; 5952 5953 default: 5954 break; 5955 } 5956 c.Iop = op; 5957 } 5958 } 5959 5960 /* Look for reg short form */ 5961 if ((ins & R) && (rm & 0xC0) == 0xC0) 5962 { 5963 switch (op) 5964 { 5965 case 0xC6: op = 0xB0 + ereg; break; 5966 case 0xC7: // if no sign extension 5967 if (!(c.Irex & REX_W && c.IEV2.Vint < 0)) 5968 { 5969 c.Irm = 0; 5970 c.Irex &= ~REX_W; 5971 op = 0xB8 + ereg; 5972 } 5973 break; 5974 5975 case 0xFF: 5976 switch (reg) 5977 { case 6<<3: op = 0x50+ereg; break;/* PUSH*/ 5978 case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/ 5979 case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/ 5980 default: break; 5981 } 5982 break; 5983 5984 case 0x8F: op = 0x58 + ereg; break; 5985 case 0x87: 5986 if (reg == 0 && !(c.Irex & (REX_R | REX_B))) // Issue 12968: Needed to ensure it's referencing RAX, not R8 5987 op = 0x90 + ereg; 5988 break; 5989 5990 default: 5991 break; 5992 } 5993 c.Iop = op; 5994 } 5995 5996 // Look to remove redundant REX prefix on XOR 5997 if (c.Irex == REX_W // ignore ops involving R8..R15 5998 && (op == 0x31 || op == 0x33) // XOR 5999 && ((rm & 0xC0) == 0xC0) // register direct 6000 && ((reg >> 3) == ereg)) // register with itself 6001 { 6002 c.Irex = 0; 6003 } 6004 6005 // Look to replace SHL reg,1 with ADD reg,reg 6006 if ((op & ~1) == 0xD0 && 6007 (rm & modregrm(3,7,0)) == modregrm(3,4,0) && 6008 config.target_cpu >= TARGET_80486) 6009 { 6010 c.Iop &= 1; 6011 c.Irm = cast(ubyte)((rm & modregrm(3,0,7)) | (ereg << 3)); 6012 if (c.Irex & REX_B) 6013 c.Irex |= REX_R; 6014 if (!(c.Iflags & CFpsw) && !I16) 6015 c.Iflags &= ~CFopsize; 6016 goto L1; 6017 } 6018 6019 /* Look for sign extended modregrm displacement, or 0 6020 * displacement. 6021 */ 6022 6023 if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp 6024 c.IFL1 == FLconst) // and it's a constant 6025 { 6026 a = c.IEV1.Vpointer; 6027 if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp] 6028 !(local_BPRM == 5 && (rm & 7) == 4 && (c.Isib & 7) == BP) 6029 ) 6030 c.Irm &= 0x3F; 6031 else if (!I16) 6032 { 6033 if (cast(targ_size_t)cast(targ_schar)a == a) 6034 c.Irm ^= 0xC0; /* do 8 sx */ 6035 } 6036 else if ((cast(targ_size_t)cast(targ_schar)a & 0xFFFF) == (a & 0xFFFF)) 6037 c.Irm ^= 0xC0; /* do 8 sx */ 6038 } 6039 6040 /* Look for LEA reg,[ireg], replace with MOV reg,ireg */ 6041 if (op == LEA) 6042 { 6043 rm = c.Irm & 7; 6044 mod = c.Irm & modregrm(3,0,0); 6045 if (mod == 0) 6046 { 6047 if (!I16) 6048 { 6049 switch (rm) 6050 { 6051 case 4: 6052 case 5: 6053 break; 6054 6055 default: 6056 c.Irm |= modregrm(3,0,0); 6057 c.Iop = 0x8B; 6058 break; 6059 } 6060 } 6061 else 6062 { 6063 switch (rm) 6064 { 6065 case 4: rm = modregrm(3,0,SI); goto L6; 6066 case 5: rm = modregrm(3,0,DI); goto L6; 6067 case 7: rm = modregrm(3,0,BX); goto L6; 6068 L6: c.Irm = cast(ubyte)(rm + reg); 6069 c.Iop = 0x8B; 6070 break; 6071 6072 default: 6073 break; 6074 } 6075 } 6076 } 6077 6078 /* replace LEA reg,0[BP] with MOV reg,BP */ 6079 else if (mod == modregrm(1,0,0) && rm == local_BPRM && 6080 c.IFL1 == FLconst && c.IEV1.Vpointer == 0) 6081 { 6082 c.Iop = 0x8B; /* MOV reg,BP */ 6083 c.Irm = cast(ubyte)(modregrm(3,0,BP) + reg); 6084 } 6085 } 6086 6087 // Replace [R13] with 0[R13] 6088 if (c.Irex & REX_B && ((c.Irm & modregrm(3,0,7)) == modregrm(0,0,BP) || 6089 issib(c.Irm) && (c.Irm & modregrm(3,0,0)) == 0 && (c.Isib & 7) == BP)) 6090 { 6091 c.Irm |= modregrm(1,0,0); 6092 c.IFL1 = FLconst; 6093 c.IEV1.Vpointer = 0; 6094 } 6095 } 6096 else if (!(c.Iflags & CFvex)) 6097 { 6098 switch (op) 6099 { 6100 default: 6101 // Look for MOV r64, immediate 6102 if ((c.Irex & REX_W) && (op & ~7) == 0xB8) 6103 { 6104 /* Look for zero extended immediate data */ 6105 if (c.IEV2.Vsize_t == c.IEV2.Vuns) 6106 { 6107 c.Irex &= ~REX_W; 6108 } 6109 /* Look for sign extended immediate data */ 6110 else if (c.IEV2.Vsize_t == c.IEV2.Vint) 6111 { 6112 c.Irm = modregrm(3,0,op & 7); 6113 c.Iop = op = 0xC7; 6114 c.IEV2.Vsize_t = c.IEV2.Vuns; 6115 } 6116 } 6117 if ((op & ~0x0F) != 0x70) 6118 break; 6119 goto case JMP; 6120 6121 case JMP: 6122 switch (c.IFL2) 6123 { 6124 case FLcode: 6125 if (c.IEV2.Vcode == code_next(c)) 6126 { 6127 c.Iop = NOP; 6128 continue; 6129 } 6130 break; 6131 6132 case FLblock: 6133 if (!code_next(c) && c.IEV2.Vblock == bn) 6134 { 6135 c.Iop = NOP; 6136 continue; 6137 } 6138 break; 6139 6140 case FLconst: 6141 case FLfunc: 6142 case FLextern: 6143 break; 6144 6145 default: 6146 WRFL(cast(FL)c.IFL2); 6147 assert(0); 6148 } 6149 break; 6150 6151 case 0x68: // PUSH immed16 6152 if (c.IFL2 == FLconst) 6153 { 6154 targ_long u = c.IEV2.Vuns; 6155 if (I64 || 6156 ((c.Iflags & CFopsize) ? I16 : I32)) 6157 { // PUSH 32/64 bit operand 6158 if (u == cast(byte) u) 6159 c.Iop = 0x6A; // PUSH immed8 6160 } 6161 else // PUSH 16 bit operand 6162 { 6163 if (cast(short)u == cast(byte) u) 6164 c.Iop = 0x6A; // PUSH immed8 6165 } 6166 } 6167 break; 6168 } 6169 } 6170 } 6171 6172 debug 6173 if (debugc) 6174 { 6175 printf("-pinholeopt(%p)\n",cstart); 6176 for (c = cstart; c; c = code_next(c)) 6177 code_print(c); 6178 } 6179 } 6180 6181 6182 debug 6183 { 6184 private void pinholeopt_unittest() 6185 { 6186 //printf("pinholeopt_unittest()\n"); 6187 static struct CS 6188 { 6189 uint model,op,ea; 6190 targ_size_t ev1,ev2; 6191 uint flags; 6192 } 6193 __gshared CS[2][22] tests = 6194 [ 6195 // XOR reg,immed NOT regL 6196 [ { 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF } ], 6197 6198 // MOV 0[BX],3 MOV [BX],3 6199 [ { 16,0xC7,modregrm(2,0,7),0,3 }, { 0,0xC7,modregrm(0,0,7),0,3 } ], 6200 6201 /+ // only if config.flags4 & CFG4space 6202 // TEST regL,immed8 6203 [ { 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 6204 [ { 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 6205 [ { 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 6206 [ { 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 6207 +/ 6208 6209 // PUSH immed => PUSH immed8 6210 [ { 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }], 6211 [ { 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }], 6212 [ { 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }], 6213 [ { 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 6214 [ { 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 6215 [ { 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 6216 [ { 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }], 6217 [ { 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }], 6218 [ { 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 6219 [ { 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 6220 [ { 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 6221 [ { 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }], 6222 [ { 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }], 6223 6224 // clear r64, for r64 != R8..R15 6225 [ { 64,0x31,0x800C0,0,0,0 }, { 0,0x31,0xC0,0,0,0}], 6226 [ { 64,0x33,0x800C0,0,0,0 }, { 0,0x33,0xC0,0,0,0}], 6227 6228 // MOV r64, immed 6229 [ { 64,0xC7,0x800C0,0,0xFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,0xFFFFFFFF,0}], 6230 [ { 64,0xC7,0x800C0,0,0x7FFFFFFF,0 }, { 0,0xB8,0,0,0x7FFFFFFF,0}], 6231 [ { 64,0xB8,0x80000,0,0xFFFFFFFF,0 }, { 0,0xB8,0,0,0xFFFFFFFF,0 }], 6232 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }, { 0,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }], 6233 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0xFFFFFFFFFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,cast(targ_size_t)0xFFFFFFFF,0}], 6234 ]; 6235 6236 //config.flags4 |= CFG4space; 6237 for (int i = 0; i < tests.length; i++) 6238 { CS *pin = &tests[i][0]; 6239 CS *pout = &tests[i][1]; 6240 code cs = void; 6241 memset(&cs, 0, cs.sizeof); 6242 if (pin.model) 6243 { 6244 if (I16 && pin.model != 16) 6245 continue; 6246 if (I32 && pin.model != 32) 6247 continue; 6248 if (I64 && pin.model != 64) 6249 continue; 6250 } 6251 //printf("[%d]\n", i); 6252 cs.Iop = pin.op; 6253 cs.Iea = pin.ea; 6254 cs.IFL1 = FLconst; 6255 cs.IFL2 = FLconst; 6256 cs.IEV1.Vsize_t = pin.ev1; 6257 cs.IEV2.Vsize_t = pin.ev2; 6258 cs.Iflags = pin.flags; 6259 pinholeopt(&cs, null); 6260 if (cs.Iop != pout.op) 6261 { printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout.op); 6262 assert(0); 6263 } 6264 assert(cs.Iea == pout.ea); 6265 assert(cs.IEV1.Vsize_t == pout.ev1); 6266 assert(cs.IEV2.Vsize_t == pout.ev2); 6267 assert(cs.Iflags == pout.flags); 6268 } 6269 } 6270 } 6271 6272 void simplify_code(code* c) 6273 { 6274 reg_t reg; 6275 if (config.flags4 & CFG4optimized && 6276 (c.Iop == 0x81 || c.Iop == 0x80) && 6277 c.IFL2 == FLconst && 6278 reghasvalue((c.Iop == 0x80) ? BYTEREGS : ALLREGS,I64 ? c.IEV2.Vsize_t : c.IEV2.Vlong,®) && 6279 !(I16 && c.Iflags & CFopsize) 6280 ) 6281 { 6282 // See if we can replace immediate instruction with register instruction 6283 static immutable ubyte[8] regop = 6284 [ 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 ]; 6285 6286 //printf("replacing 0x%02x, val = x%lx\n",c.Iop,c.IEV2.Vlong); 6287 c.Iop = regop[(c.Irm & modregrm(0,7,0)) >> 3] | (c.Iop & 1); 6288 code_newreg(c, reg); 6289 if (I64 && !(c.Iop & 1) && (reg & 4)) 6290 c.Irex |= REX; 6291 } 6292 } 6293 6294 /************************** 6295 * Compute jump addresses for FLcode. 6296 * Note: only works for forward referenced code. 6297 * only direct jumps and branches are detected. 6298 * LOOP instructions only work for backward refs. 6299 */ 6300 6301 void jmpaddr(code *c) 6302 { 6303 code* ci,cn,ctarg,cstart; 6304 targ_size_t ad; 6305 6306 //printf("jmpaddr()\n"); 6307 cstart = c; /* remember start of code */ 6308 while (c) 6309 { 6310 const op = c.Iop; 6311 if (op <= 0xEB && 6312 inssize[op] & T && // if second operand 6313 c.IFL2 == FLcode && 6314 ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL)) 6315 { 6316 ci = code_next(c); 6317 ctarg = c.IEV2.Vcode; /* target code */ 6318 ad = 0; /* IP displacement */ 6319 while (ci && ci != ctarg) 6320 { 6321 ad += calccodsize(ci); 6322 ci = code_next(ci); 6323 } 6324 if (!ci) 6325 goto Lbackjmp; // couldn't find it 6326 if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL) 6327 c.IEV2.Vpointer = ad; 6328 else /* else conditional */ 6329 { 6330 if (!(c.Iflags & CFjmp16)) /* if branch */ 6331 c.IEV2.Vpointer = ad; 6332 else /* branch around a long jump */ 6333 { 6334 cn = code_next(c); 6335 c.next = code_calloc(); 6336 code_next(c).next = cn; 6337 c.Iop = op ^ 1; /* converse jmp */ 6338 c.Iflags &= ~CFjmp16; 6339 c.IEV2.Vpointer = I16 ? 3 : 5; 6340 cn = code_next(c); 6341 cn.Iop = JMP; /* long jump */ 6342 cn.IFL2 = FLconst; 6343 cn.IEV2.Vpointer = ad; 6344 } 6345 } 6346 c.IFL2 = FLconst; 6347 } 6348 if (op == LOOP && c.IFL2 == FLcode) /* backwards refs */ 6349 { 6350 Lbackjmp: 6351 ctarg = c.IEV2.Vcode; 6352 for (ci = cstart; ci != ctarg; ci = code_next(ci)) 6353 if (!ci || ci == c) 6354 assert(0); 6355 ad = 2; /* - IP displacement */ 6356 while (ci != c) 6357 { 6358 assert(ci); 6359 ad += calccodsize(ci); 6360 ci = code_next(ci); 6361 } 6362 c.IEV2.Vpointer = (-ad) & 0xFF; 6363 c.IFL2 = FLconst; 6364 } 6365 c = code_next(c); 6366 } 6367 } 6368 6369 /******************************* 6370 * Calculate bl.Bsize. 6371 */ 6372 6373 uint calcblksize(code *c) 6374 { 6375 uint size; 6376 for (size = 0; c; c = code_next(c)) 6377 { 6378 uint sz = calccodsize(c); 6379 //printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c.Iop); 6380 size += sz; 6381 } 6382 //printf("calcblksize(c = x%x) = %d\n", c, size); 6383 return size; 6384 } 6385 6386 /***************************** 6387 * Calculate and return code size of a code. 6388 * Note that NOPs are sometimes used as markers, but are 6389 * never output. LINNUMs are never output. 6390 * Note: This routine must be fast. Profiling shows it is significant. 6391 */ 6392 6393 uint calccodsize(code *c) 6394 { 6395 uint size; 6396 ubyte rm,mod,ins; 6397 uint iflags; 6398 uint i32 = I32 || I64; 6399 uint a32 = i32; 6400 6401 debug 6402 assert((a32 & ~1) == 0); 6403 6404 iflags = c.Iflags; 6405 opcode_t op = c.Iop; 6406 //printf("calccodsize(x%08x), Iflags = x%x\n", op, iflags); 6407 if (iflags & CFvex && c.Ivex.pfx == 0xC4) 6408 { 6409 ins = vex_inssize(c); 6410 size = ins & 7; 6411 goto Lmodrm; 6412 } 6413 else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800) 6414 op = 0x0F; 6415 else 6416 op &= 0xFF; 6417 switch (op) 6418 { 6419 case 0x0F: 6420 if ((c.Iop & 0xFFFD00) == 0x0F3800) 6421 { // 3 byte op ( 0F38-- or 0F3A-- ) 6422 ins = inssize2[(c.Iop >> 8) & 0xFF]; 6423 size = ins & 7; 6424 if (c.Iop & 0xFF000000) 6425 size++; 6426 } 6427 else 6428 { // 2 byte op ( 0F-- ) 6429 ins = inssize2[c.Iop & 0xFF]; 6430 size = ins & 7; 6431 if (c.Iop & 0xFF0000) 6432 size++; 6433 } 6434 break; 6435 6436 case 0x90: 6437 size = (c.Iop == PAUSE) ? 2 : 1; 6438 goto Lret2; 6439 6440 case NOP: 6441 case ESCAPE: 6442 size = 0; // since these won't be output 6443 goto Lret2; 6444 6445 case ASM: 6446 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6447 size = _tysize[TYnptr]; 6448 else 6449 size = cast(uint)c.IEV1.len; 6450 goto Lret2; 6451 6452 case 0xA1: 6453 case 0xA3: 6454 if (c.Irex) 6455 { 6456 size = 9; // 64 bit immediate value for MOV to/from RAX 6457 goto Lret; 6458 } 6459 goto Ldefault; 6460 6461 case 0xF6: /* TEST mem8,immed8 */ 6462 ins = inssize[op]; 6463 size = ins & 7; 6464 if (i32) 6465 size = inssize32[op]; 6466 if ((c.Irm & (7<<3)) == 0) 6467 size++; /* size of immed8 */ 6468 break; 6469 6470 case 0xF7: 6471 ins = inssize[op]; 6472 size = ins & 7; 6473 if (i32) 6474 size = inssize32[op]; 6475 if ((c.Irm & (7<<3)) == 0) 6476 size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2; 6477 break; 6478 6479 default: 6480 Ldefault: 6481 ins = inssize[op]; 6482 size = ins & 7; 6483 if (i32) 6484 size = inssize32[op]; 6485 } 6486 6487 if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG)) 6488 { 6489 if (iflags & CFwait) // if add FWAIT prefix 6490 size++; 6491 if (iflags & CFSEG) // if segment override 6492 size++; 6493 6494 // If the instruction has a second operand that is not an 8 bit, 6495 // and the operand size prefix is present, then fix the size computation 6496 // because the operand size will be different. 6497 // Walter, I had problems with this bit at the end. There can still be 6498 // an ADDRSIZE prefix for these and it does indeed change the operand size. 6499 6500 if (iflags & (CFopsize | CFaddrsize)) 6501 { 6502 if ((ins & (T|E)) == T) 6503 { 6504 if ((op & 0xAC) == 0xA0) 6505 { 6506 if (iflags & CFaddrsize && !I64) 6507 { if (I32) 6508 size -= 2; 6509 else 6510 size += 2; 6511 } 6512 } 6513 else if (iflags & CFopsize) 6514 { if (I16) 6515 size += 2; 6516 else 6517 size -= 2; 6518 } 6519 } 6520 if (iflags & CFaddrsize) 6521 { if (!I64) 6522 a32 ^= 1; 6523 size++; 6524 } 6525 if (iflags & CFopsize) 6526 size++; /* +1 for OPSIZE prefix */ 6527 } 6528 } 6529 6530 Lmodrm: 6531 if ((op & ~0x0F) == 0x70) 6532 { 6533 if (iflags & CFjmp16) // if long branch 6534 size += I16 ? 3 : 4; // + 3(4) bytes for JMP 6535 } 6536 else if (ins & M) // if modregrm byte 6537 { 6538 rm = c.Irm; 6539 mod = rm & 0xC0; 6540 if (a32 || I64) 6541 { // 32 bit addressing 6542 if (issib(rm)) 6543 size++; 6544 switch (mod) 6545 { case 0: 6546 if (issib(rm) && (c.Isib & 7) == 5 || 6547 (rm & 7) == 5) 6548 size += 4; /* disp32 */ 6549 if (c.Irex & REX_B && (rm & 7) == 5) 6550 /* Instead of selecting R13, this mode is an [RIP] relative 6551 * address. Although valid, it's redundant, and should not 6552 * be generated. Instead, generate 0[R13] instead of [R13]. 6553 */ 6554 assert(0); 6555 break; 6556 6557 case 0x40: 6558 size++; /* disp8 */ 6559 break; 6560 6561 case 0x80: 6562 size += 4; /* disp32 */ 6563 break; 6564 6565 default: 6566 break; 6567 } 6568 } 6569 else 6570 { // 16 bit addressing 6571 if (mod == 0x40) /* 01: 8 bit displacement */ 6572 size++; 6573 else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6)) 6574 size += 2; 6575 } 6576 } 6577 6578 Lret: 6579 if (!(iflags & CFvex) && c.Irex) 6580 { 6581 size++; 6582 if (c.Irex & REX_W && (op & ~7) == 0xB8) 6583 size += 4; 6584 } 6585 Lret2: 6586 //printf("op = x%02x, size = %d\n",op,size); 6587 return size; 6588 } 6589 6590 /******************************** 6591 * Return !=0 if codes match. 6592 */ 6593 6594 static if (0) 6595 { 6596 6597 int code_match(code *c1,code *c2) 6598 { 6599 code cs1,cs2; 6600 ubyte ins; 6601 6602 if (c1 == c2) 6603 goto match; 6604 cs1 = *c1; 6605 cs2 = *c2; 6606 if (cs1.Iop != cs2.Iop) 6607 goto nomatch; 6608 switch (cs1.Iop) 6609 { 6610 case ESCAPE | ESCctor: 6611 case ESCAPE | ESCdtor: 6612 goto nomatch; 6613 6614 case NOP: 6615 goto match; 6616 6617 case ASM: 6618 if (cs1.IEV1.len == cs2.IEV1.len && 6619 memcmp(cs1.IEV1.bytes,cs2.IEV1.bytes,cs1.EV1.len) == 0) 6620 goto match; 6621 else 6622 goto nomatch; 6623 6624 default: 6625 if ((cs1.Iop & 0xFF) == ESCAPE) 6626 goto match; 6627 break; 6628 } 6629 if (cs1.Iflags != cs2.Iflags) 6630 goto nomatch; 6631 6632 ins = inssize[cs1.Iop & 0xFF]; 6633 if ((cs1.Iop & 0xFFFD00) == 0x0F3800) 6634 { 6635 ins = inssize2[(cs1.Iop >> 8) & 0xFF]; 6636 } 6637 else if ((cs1.Iop & 0xFF00) == 0x0F00) 6638 { 6639 ins = inssize2[cs1.Iop & 0xFF]; 6640 } 6641 6642 if (ins & M) // if modregrm byte 6643 { 6644 if (cs1.Irm != cs2.Irm) 6645 goto nomatch; 6646 if ((cs1.Irm & 0xC0) == 0xC0) 6647 goto do2; 6648 if (is32bitaddr(I32,cs1.Iflags)) 6649 { 6650 if (issib(cs1.Irm) && cs1.Isib != cs2.Isib) 6651 goto nomatch; 6652 if ( 6653 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 6654 ) 6655 goto do2; /* if no first operand */ 6656 } 6657 else 6658 { 6659 if ( 6660 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 6661 ) 6662 goto do2; /* if no first operand */ 6663 } 6664 if (cs1.IFL1 != cs2.IFL1) 6665 goto nomatch; 6666 if (flinsymtab[cs1.IFL1] && cs1.IEV1.Vsym != cs2.IEV1.Vsym) 6667 goto nomatch; 6668 if (cs1.IEV1.Voffset != cs2.IEV1.Voffset) 6669 goto nomatch; 6670 } 6671 6672 do2: 6673 if (!(ins & T)) // if no second operand 6674 goto match; 6675 if (cs1.IFL2 != cs2.IFL2) 6676 goto nomatch; 6677 if (flinsymtab[cs1.IFL2] && cs1.IEV2.Vsym != cs2.IEV2.Vsym) 6678 goto nomatch; 6679 if (cs1.IEV2.Voffset != cs2.IEV2.Voffset) 6680 goto nomatch; 6681 6682 match: 6683 return 1; 6684 6685 nomatch: 6686 return 0; 6687 } 6688 6689 } 6690 6691 /************************** 6692 * Write code to intermediate file. 6693 * Code starts at offset. 6694 * Returns: 6695 * addr of end of code 6696 */ 6697 6698 private struct MiniCodeBuf 6699 { 6700 nothrow: 6701 size_t index; 6702 size_t offset; 6703 int seg; 6704 char[100] bytes; // = void; 6705 6706 this(int seg) 6707 { 6708 index = 0; 6709 this.offset = cast(size_t)Offset(seg); 6710 this.seg = seg; 6711 } 6712 6713 void flushx() 6714 { 6715 // Emit accumulated bytes to code segment 6716 debug assert(index < bytes.length); 6717 offset += objmod.bytes(seg, offset, cast(uint)index, bytes.ptr); 6718 index = 0; 6719 } 6720 6721 void gen(char c) { bytes[index++] = c; } 6722 6723 void genp(size_t n, void *p) { memcpy(&bytes[index], p, n); index += n; } 6724 6725 void flush() { if (index) flushx(); } 6726 6727 uint getOffset() { return cast(uint)(offset + index); } 6728 6729 uint available() { return cast(uint)(bytes.sizeof - index); } 6730 } 6731 6732 private void do8bit(MiniCodeBuf *pbuf, FL, evc *); 6733 private void do16bit(MiniCodeBuf *pbuf, FL, evc *,int); 6734 private void do32bit(MiniCodeBuf *pbuf, FL, evc *,int,int = 0); 6735 private void do64bit(MiniCodeBuf *pbuf, FL, evc *,int); 6736 6737 uint codout(int seg, code *c) 6738 { 6739 ubyte rm,mod; 6740 ubyte ins; 6741 code *cn; 6742 uint flags; 6743 Symbol *s; 6744 6745 debug 6746 if (debugc) printf("codout(%p), Coffset = x%llx\n",c,cast(ulong)Offset(seg)); 6747 6748 MiniCodeBuf ggen = void; 6749 ggen.index = 0; 6750 ggen.offset = cast(size_t)Offset(seg); 6751 ggen.seg = seg; 6752 6753 for (; c; c = code_next(c)) 6754 { 6755 debug 6756 { 6757 if (debugc) { printf("off=%02x, sz=%d, ",cast(int)ggen.getOffset(),cast(int)calccodsize(c)); code_print(c); } 6758 uint startoffset = ggen.getOffset(); 6759 } 6760 6761 opcode_t op = c.Iop; 6762 ins = inssize[op & 0xFF]; 6763 switch (op & 0xFF) 6764 { 6765 case ESCAPE: 6766 /* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */ 6767 if(op == 0x660F383E || c.Iflags & CFvex) break; 6768 6769 switch (op & 0xFFFF00) 6770 { case ESClinnum: 6771 /* put out line number stuff */ 6772 objmod.linnum(c.IEV1.Vsrcpos,seg,ggen.getOffset()); 6773 break; 6774 version (SCPP) 6775 { 6776 static if (1) 6777 { 6778 case ESCctor: 6779 case ESCdtor: 6780 case ESCoffset: 6781 if (config.exe != EX_WIN32) 6782 except_pair_setoffset(c,ggen.getOffset() - funcoffset); 6783 break; 6784 6785 case ESCmark: 6786 case ESCrelease: 6787 case ESCmark2: 6788 case ESCrelease2: 6789 break; 6790 } 6791 else 6792 { 6793 case ESCctor: 6794 except_push(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null); 6795 break; 6796 6797 case ESCdtor: 6798 except_pop(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null); 6799 break; 6800 6801 case ESCmark: 6802 except_mark(); 6803 break; 6804 6805 case ESCrelease: 6806 except_release(); 6807 break; 6808 } 6809 } 6810 case ESCadjesp: 6811 //printf("adjust ESP %ld\n", (long)c.IEV1.Vint); 6812 break; 6813 6814 default: 6815 break; 6816 } 6817 6818 debug 6819 assert(calccodsize(c) == 0); 6820 6821 continue; 6822 6823 case NOP: /* don't send them out */ 6824 if (op != NOP) 6825 break; 6826 debug 6827 assert(calccodsize(c) == 0); 6828 6829 continue; 6830 6831 case ASM: 6832 if (op != ASM) 6833 break; 6834 ggen.flush(); 6835 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6836 { 6837 do32bit(&ggen, FLblockoff,&c.IEV1,0); 6838 } 6839 else 6840 { 6841 ggen.offset += objmod.bytes(seg,ggen.offset,cast(uint)c.IEV1.len,c.IEV1.bytes); 6842 } 6843 debug 6844 assert(calccodsize(c) == c.IEV1.len); 6845 6846 continue; 6847 6848 default: 6849 break; 6850 } 6851 flags = c.Iflags; 6852 6853 // See if we need to flush (don't have room for largest code sequence) 6854 if (ggen.available() < (1+4+4+8+8)) 6855 ggen.flush(); 6856 6857 // see if we need to put out prefix bytes 6858 if (flags & (CFwait | CFPREFIX | CFjmp16)) 6859 { 6860 int override_; 6861 6862 if (flags & CFwait) 6863 ggen.gen(0x9B); // FWAIT 6864 /* ? SEGES : SEGSS */ 6865 switch (flags & CFSEG) 6866 { case CFes: override_ = SEGES; goto segover; 6867 case CFss: override_ = SEGSS; goto segover; 6868 case CFcs: override_ = SEGCS; goto segover; 6869 case CFds: override_ = SEGDS; goto segover; 6870 case CFfs: override_ = SEGFS; goto segover; 6871 case CFgs: override_ = SEGGS; goto segover; 6872 segover: ggen.gen(cast(ubyte)override_); 6873 break; 6874 6875 default: break; 6876 } 6877 6878 if (flags & CFaddrsize) 6879 ggen.gen(0x67); 6880 6881 // Do this last because of instructions like ADDPD 6882 if (flags & CFopsize) 6883 ggen.gen(0x66); /* operand size */ 6884 6885 if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */ 6886 { 6887 if (!I16) 6888 { // Put out 16 bit conditional jump 6889 c.Iop = op = 0x0F00 | (0x80 | (op & 0x0F)); 6890 } 6891 else 6892 { 6893 cn = code_calloc(); 6894 /*cxcalloc++;*/ 6895 cn.next = code_next(c); 6896 c.next= cn; // link into code 6897 cn.Iop = JMP; // JMP block 6898 cn.IFL2 = c.IFL2; 6899 cn.IEV2.Vblock = c.IEV2.Vblock; 6900 c.Iop = op ^= 1; // toggle condition 6901 c.IFL2 = FLconst; 6902 c.IEV2.Vpointer = I16 ? 3 : 5; // skip over JMP block 6903 c.Iflags &= ~CFjmp16; 6904 } 6905 } 6906 } 6907 6908 if (flags & CFvex) 6909 { 6910 if (flags & CFvex3) 6911 { 6912 ggen.gen(0xC4); 6913 ggen.gen(cast(ubyte)VEX3_B1(c.Ivex)); 6914 ggen.gen(cast(ubyte)VEX3_B2(c.Ivex)); 6915 ggen.gen(c.Ivex.op); 6916 } 6917 else 6918 { 6919 ggen.gen(0xC5); 6920 ggen.gen(cast(ubyte)VEX2_B1(c.Ivex)); 6921 ggen.gen(c.Ivex.op); 6922 } 6923 ins = vex_inssize(c); 6924 goto Lmodrm; 6925 } 6926 6927 if (op > 0xFF) 6928 { 6929 if ((op & 0xFFFD00) == 0x0F3800) 6930 ins = inssize2[(op >> 8) & 0xFF]; 6931 else if ((op & 0xFF00) == 0x0F00) 6932 ins = inssize2[op & 0xFF]; 6933 6934 if (op & 0xFF000000) 6935 { 6936 ubyte op1 = op >> 24; 6937 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 6938 { 6939 ggen.gen(op1); 6940 if (c.Irex) 6941 ggen.gen(c.Irex | REX); 6942 } 6943 else 6944 { 6945 if (c.Irex) 6946 ggen.gen(c.Irex | REX); 6947 ggen.gen(op1); 6948 } 6949 ggen.gen((op >> 16) & 0xFF); 6950 ggen.gen((op >> 8) & 0xFF); 6951 ggen.gen(op & 0xFF); 6952 } 6953 else if (op & 0xFF0000) 6954 { 6955 ubyte op1 = cast(ubyte)(op >> 16); 6956 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 6957 { 6958 ggen.gen(op1); 6959 if (c.Irex) 6960 ggen.gen(c.Irex | REX); 6961 } 6962 else 6963 { 6964 if (c.Irex) 6965 ggen.gen(c.Irex | REX); 6966 ggen.gen(op1); 6967 } 6968 ggen.gen((op >> 8) & 0xFF); 6969 ggen.gen(op & 0xFF); 6970 } 6971 else 6972 { 6973 if (c.Irex) 6974 ggen.gen(c.Irex | REX); 6975 ggen.gen((op >> 8) & 0xFF); 6976 ggen.gen(op & 0xFF); 6977 } 6978 } 6979 else 6980 { 6981 if (c.Irex) 6982 ggen.gen(c.Irex | REX); 6983 ggen.gen(cast(ubyte)op); 6984 } 6985 Lmodrm: 6986 if (ins & M) /* if modregrm byte */ 6987 { 6988 rm = c.Irm; 6989 ggen.gen(rm); 6990 6991 // Look for an address size override when working with the 6992 // MOD R/M and SIB bytes 6993 6994 if (is32bitaddr( I32, flags)) 6995 { 6996 if (issib(rm)) 6997 ggen.gen(c.Isib); 6998 switch (rm & 0xC0) 6999 { 7000 case 0x40: 7001 do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1); // 8 bit 7002 break; 7003 7004 case 0: 7005 if (!(issib(rm) && (c.Isib & 7) == 5 || 7006 (rm & 7) == 5)) 7007 break; 7008 goto case 0x80; 7009 7010 case 0x80: 7011 { 7012 int cfflags = CFoff; 7013 targ_size_t val = 0; 7014 if (I64) 7015 { 7016 if ((rm & modregrm(3,0,7)) == modregrm(0,0,5)) // if disp32[RIP] 7017 { 7018 cfflags |= CFpc32; 7019 val = -4; 7020 reg_t reg = rm & modregrm(0,7,0); 7021 if (ins & T || 7022 ((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0)))) 7023 { if (ins & E || op == 0xF6) 7024 val = -5; 7025 else if (c.Iflags & CFopsize) 7026 val = -6; 7027 else 7028 val = -8; 7029 } 7030 7031 if (config.exe & (EX_OSX64 | EX_WIN64)) 7032 /* Mach-O and Win64 fixups already take the 4 byte size 7033 * into account, so bias by 4 7034 */ 7035 val += 4; 7036 } 7037 } 7038 do32bit(&ggen, cast(FL)c.IFL1,&c.IEV1,cfflags,cast(int)val); 7039 break; 7040 } 7041 7042 default: 7043 break; 7044 } 7045 } 7046 else 7047 { 7048 switch (rm & 0xC0) 7049 { case 0x40: 7050 do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1); // 8 bit 7051 break; 7052 7053 case 0: 7054 if ((rm & 7) != 6) 7055 break; 7056 goto case 0x80; 7057 7058 case 0x80: 7059 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,CFoff); 7060 break; 7061 7062 default: 7063 break; 7064 } 7065 } 7066 } 7067 else 7068 { 7069 if (op == ENTER) 7070 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,0); 7071 } 7072 flags &= CFseg | CFoff | CFselfrel; 7073 if (ins & T) /* if second operand */ 7074 { 7075 if (ins & E) /* if data-8 */ 7076 do8bit(&ggen, cast(FL) c.IFL2,&c.IEV2); 7077 else if (!I16) 7078 { 7079 switch (op) 7080 { 7081 case 0xC2: /* RETN imm16 */ 7082 case 0xCA: /* RETF imm16 */ 7083 do16: 7084 do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7085 break; 7086 7087 case 0xA1: 7088 case 0xA3: 7089 if (I64 && c.Irex) 7090 { 7091 do64: 7092 do64bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7093 break; 7094 } 7095 goto case 0xA0; 7096 7097 case 0xA0: /* MOV AL,byte ptr [] */ 7098 case 0xA2: 7099 if (c.Iflags & CFaddrsize && !I64) 7100 goto do16; 7101 else 7102 do32: 7103 do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7104 break; 7105 7106 case 0x9A: 7107 case 0xEA: 7108 if (c.Iflags & CFopsize) 7109 goto ptr1616; 7110 else 7111 goto ptr1632; 7112 7113 case 0x68: // PUSH immed32 7114 if (cast(FL)c.IFL2 == FLblock) 7115 { 7116 c.IFL2 = FLblockoff; 7117 goto do32; 7118 } 7119 else 7120 goto case_default; 7121 7122 case CALL: // CALL rel 7123 case JMP: // JMP rel 7124 flags |= CFselfrel; 7125 goto case_default; 7126 7127 default: 7128 if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32 7129 flags |= CFselfrel; 7130 if (I64 && (op & ~7) == 0xB8 && c.Irex & REX_W) 7131 goto do64; 7132 case_default: 7133 if (c.Iflags & CFopsize) 7134 goto do16; 7135 else 7136 goto do32; 7137 } 7138 } 7139 else 7140 { 7141 switch (op) 7142 { 7143 case 0xC2: 7144 case 0xCA: 7145 goto do16; 7146 7147 case 0xA0: 7148 case 0xA1: 7149 case 0xA2: 7150 case 0xA3: 7151 if (c.Iflags & CFaddrsize) 7152 goto do32; 7153 else 7154 goto do16; 7155 7156 case 0x9A: 7157 case 0xEA: 7158 if (c.Iflags & CFopsize) 7159 goto ptr1632; 7160 else 7161 goto ptr1616; 7162 7163 ptr1616: 7164 ptr1632: 7165 //assert(c.IFL2 == FLfunc); 7166 ggen.flush(); 7167 if (c.IFL2 == FLdatseg) 7168 { 7169 objmod.reftodatseg(seg,ggen.offset,c.IEV2.Vpointer, 7170 c.IEV2.Vseg,flags); 7171 ggen.offset += 4; 7172 } 7173 else 7174 { 7175 s = c.IEV2.Vsym; 7176 ggen.offset += objmod.reftoident(seg,ggen.offset,s,0,flags); 7177 } 7178 break; 7179 7180 case 0x68: // PUSH immed16 7181 if (cast(FL)c.IFL2 == FLblock) 7182 { c.IFL2 = FLblockoff; 7183 goto do16; 7184 } 7185 else 7186 goto case_default16; 7187 7188 case CALL: 7189 case JMP: 7190 flags |= CFselfrel; 7191 goto default; 7192 7193 default: 7194 case_default16: 7195 if (c.Iflags & CFopsize) 7196 goto do32; 7197 else 7198 goto do16; 7199 } 7200 } 7201 } 7202 else if (op == 0xF6) /* TEST mem8,immed8 */ 7203 { 7204 if ((rm & (7<<3)) == 0) 7205 do8bit(&ggen, cast(FL)c.IFL2,&c.IEV2); 7206 } 7207 else if (op == 0xF7) 7208 { 7209 if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */ 7210 { 7211 if ((I32 || I64) ^ ((c.Iflags & CFopsize) != 0)) 7212 do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7213 else 7214 do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7215 } 7216 } 7217 7218 debug 7219 if (ggen.getOffset() - startoffset != calccodsize(c)) 7220 { 7221 printf("actual: %d, calc: %d\n", cast(int)(ggen.getOffset() - startoffset), cast(int)calccodsize(c)); 7222 code_print(c); 7223 assert(0); 7224 } 7225 } 7226 ggen.flush(); 7227 Offset(seg) = ggen.offset; 7228 //printf("-codout(), Coffset = x%x\n", Offset(seg)); 7229 return cast(uint)ggen.offset; /* ending address */ 7230 } 7231 7232 7233 private void do64bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags) 7234 { 7235 char *p; 7236 Symbol *s; 7237 targ_size_t ad; 7238 7239 assert(I64); 7240 switch (fl) 7241 { 7242 case FLconst: 7243 ad = *cast(targ_size_t *) uev; 7244 L1: 7245 pbuf.genp(8,&ad); 7246 return; 7247 7248 case FLdatseg: 7249 pbuf.flush(); 7250 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,CFoffset64 | flags); 7251 break; 7252 7253 case FLframehandler: 7254 framehandleroffset = pbuf.getOffset(); 7255 ad = 0; 7256 goto L1; 7257 7258 case FLswitch: 7259 pbuf.flush(); 7260 ad = uev.Vswitch.Btableoffset; 7261 if (config.flags & CFGromable) 7262 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7263 else 7264 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7265 break; 7266 7267 case FLcsdata: 7268 case FLfardata: 7269 //symbol_print(uev.Vsym); 7270 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7271 // strings and statics are treated like offsets from a 7272 // un-named external with is the start of .rodata or .data 7273 case FLextern: /* external data symbol */ 7274 case FLtlsdata: 7275 pbuf.flush(); 7276 s = uev.Vsym; /* symbol pointer */ 7277 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7278 break; 7279 7280 case FLgotoff: 7281 if (config.exe & (EX_OSX | EX_OSX64)) 7282 { 7283 assert(0); 7284 } 7285 else if (config.exe & EX_posix) 7286 { 7287 pbuf.flush(); 7288 s = uev.Vsym; /* symbol pointer */ 7289 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7290 break; 7291 } 7292 else 7293 assert(0); 7294 7295 case FLgot: 7296 if (config.exe & (EX_OSX | EX_OSX64)) 7297 { 7298 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7299 ad = 0; 7300 goto L1; 7301 } 7302 else if (config.exe & EX_posix) 7303 { 7304 pbuf.flush(); 7305 s = uev.Vsym; /* symbol pointer */ 7306 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7307 break; 7308 } 7309 else 7310 assert(0); 7311 7312 case FLfunc: /* function call */ 7313 s = uev.Vsym; /* symbol pointer */ 7314 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7315 pbuf.flush(); 7316 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,CFoffset64 | flags); 7317 break; 7318 7319 case FLblock: /* displacement to another block */ 7320 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7321 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7322 goto L1; 7323 7324 case FLblockoff: 7325 pbuf.flush(); 7326 assert(uev.Vblock); 7327 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7328 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7329 break; 7330 7331 default: 7332 WRFL(fl); 7333 assert(0); 7334 } 7335 pbuf.offset += 8; 7336 } 7337 7338 7339 private void do32bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags, int val) 7340 { 7341 char *p; 7342 Symbol *s; 7343 targ_size_t ad; 7344 7345 //printf("do32bit(flags = x%x)\n", flags); 7346 switch (fl) 7347 { 7348 case FLconst: 7349 assert(targ_size_t.sizeof == 4 || targ_size_t.sizeof == 8); 7350 ad = * cast(targ_size_t *) uev; 7351 L1: 7352 pbuf.genp(4,&ad); 7353 return; 7354 7355 case FLdatseg: 7356 pbuf.flush(); 7357 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7358 break; 7359 7360 case FLframehandler: 7361 framehandleroffset = pbuf.getOffset(); 7362 ad = 0; 7363 goto L1; 7364 7365 case FLswitch: 7366 pbuf.flush(); 7367 ad = uev.Vswitch.Btableoffset; 7368 if (config.flags & CFGromable) 7369 { 7370 if (config.exe & (EX_OSX | EX_OSX64)) 7371 { 7372 // These are magic values based on the exact code generated for the switch jump 7373 if (I64) 7374 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7375 else 7376 uev.Vswitch.Btablebase = pbuf.getOffset() + 4 - 8; 7377 ad -= uev.Vswitch.Btablebase; 7378 goto L1; 7379 } 7380 else if (config.exe & EX_windos) 7381 { 7382 if (I64) 7383 { 7384 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7385 ad -= uev.Vswitch.Btablebase; 7386 goto L1; 7387 } 7388 else 7389 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7390 } 7391 else 7392 { 7393 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7394 } 7395 } 7396 else 7397 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7398 break; 7399 7400 case FLcode: 7401 //assert(JMPJMPTABLE); // the only use case 7402 pbuf.flush(); 7403 ad = *cast(targ_size_t *) uev + pbuf.getOffset(); 7404 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7405 break; 7406 7407 case FLcsdata: 7408 case FLfardata: 7409 //symbol_print(uev.Vsym); 7410 7411 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7412 // strings and statics are treated like offsets from a 7413 // un-named external with is the start of .rodata or .data 7414 case FLextern: /* external data symbol */ 7415 case FLtlsdata: 7416 pbuf.flush(); 7417 s = uev.Vsym; /* symbol pointer */ 7418 if (config.exe & EX_windos && I64 && (flags & CFpc32)) 7419 { 7420 /* This is for those funky fixups where the location to be fixed up 7421 * is a 'val' amount back from the current RIP, biased by adding 4. 7422 */ 7423 assert(val >= -5 && val <= 0); 7424 flags |= (-val & 7) << 24; // set CFREL value 7425 assert(CFREL == (7 << 24)); 7426 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7427 } 7428 else 7429 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7430 break; 7431 7432 case FLgotoff: 7433 if (config.exe & (EX_OSX | EX_OSX64)) 7434 { 7435 assert(0); 7436 } 7437 else if (config.exe & EX_posix) 7438 { 7439 pbuf.flush(); 7440 s = uev.Vsym; /* symbol pointer */ 7441 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7442 break; 7443 } 7444 else 7445 assert(0); 7446 7447 case FLgot: 7448 if (config.exe & (EX_OSX | EX_OSX64)) 7449 { 7450 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7451 ad = 0; 7452 goto L1; 7453 } 7454 else if (config.exe & EX_posix) 7455 { 7456 pbuf.flush(); 7457 s = uev.Vsym; /* symbol pointer */ 7458 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7459 break; 7460 } 7461 else 7462 assert(0); 7463 7464 case FLfunc: /* function call */ 7465 s = uev.Vsym; /* symbol pointer */ 7466 if (tyfarfunc(s.ty())) 7467 { /* Large code references are always absolute */ 7468 pbuf.flush(); 7469 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 4; 7470 } 7471 else if (s.Sseg == pbuf.seg && 7472 (s.Sclass == SCstatic || s.Sclass == SCglobal) && 7473 s.Sxtrnnum == 0 && flags & CFselfrel) 7474 { /* if we know it's relative address */ 7475 ad = s.Soffset - pbuf.getOffset() - 4; 7476 goto L1; 7477 } 7478 else 7479 { 7480 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7481 pbuf.flush(); 7482 objmod.reftoident(pbuf.seg,pbuf.offset,s,val,flags); 7483 } 7484 break; 7485 7486 case FLblock: /* displacement to another block */ 7487 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7488 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7489 goto L1; 7490 7491 case FLblockoff: 7492 pbuf.flush(); 7493 assert(uev.Vblock); 7494 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7495 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7496 break; 7497 7498 default: 7499 WRFL(fl); 7500 assert(0); 7501 } 7502 pbuf.offset += 4; 7503 } 7504 7505 7506 private void do16bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags) 7507 { 7508 char *p; 7509 Symbol *s; 7510 targ_size_t ad; 7511 7512 switch (fl) 7513 { 7514 case FLconst: 7515 pbuf.genp(2,cast(char *) uev); 7516 return; 7517 7518 case FLdatseg: 7519 pbuf.flush(); 7520 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7521 break; 7522 7523 case FLswitch: 7524 pbuf.flush(); 7525 ad = uev.Vswitch.Btableoffset; 7526 if (config.flags & CFGromable) 7527 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7528 else 7529 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7530 break; 7531 7532 case FLcsdata: 7533 case FLfardata: 7534 case FLextern: /* external data symbol */ 7535 case FLtlsdata: 7536 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7537 pbuf.flush(); 7538 s = uev.Vsym; /* symbol pointer */ 7539 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7540 break; 7541 7542 case FLfunc: /* function call */ 7543 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7544 s = uev.Vsym; /* symbol pointer */ 7545 if (tyfarfunc(s.ty())) 7546 { /* Large code references are always absolute */ 7547 pbuf.flush(); 7548 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 2; 7549 } 7550 else if (s.Sseg == pbuf.seg && 7551 (s.Sclass == SCstatic || s.Sclass == SCglobal) && 7552 s.Sxtrnnum == 0 && flags & CFselfrel) 7553 { /* if we know it's relative address */ 7554 ad = s.Soffset - pbuf.getOffset() - 2; 7555 goto L1; 7556 } 7557 else 7558 { 7559 pbuf.flush(); 7560 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags); 7561 } 7562 break; 7563 7564 case FLblock: /* displacement to another block */ 7565 ad = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7566 debug 7567 { 7568 targ_ptrdiff_t delta = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7569 assert(cast(short)delta == delta); 7570 } 7571 L1: 7572 pbuf.genp(2,&ad); // displacement 7573 return; 7574 7575 case FLblockoff: 7576 pbuf.flush(); 7577 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7578 break; 7579 7580 default: 7581 WRFL(fl); 7582 assert(0); 7583 } 7584 pbuf.offset += 2; 7585 } 7586 7587 7588 private void do8bit(MiniCodeBuf *pbuf, FL fl, evc *uev) 7589 { 7590 char c; 7591 targ_ptrdiff_t delta; 7592 7593 switch (fl) 7594 { 7595 case FLconst: 7596 c = cast(char)uev.Vuns; 7597 break; 7598 7599 case FLblock: 7600 delta = uev.Vblock.Boffset - pbuf.getOffset() - 1; 7601 if (cast(byte)delta != delta) 7602 { 7603 version (MARS) 7604 { 7605 if (uev.Vblock.Bsrcpos.Slinnum) 7606 printf("%s(%d): ", uev.Vblock.Bsrcpos.Sfilename, uev.Vblock.Bsrcpos.Slinnum); 7607 } 7608 printf("block displacement of %lld exceeds the maximum offset of -128 to 127.\n", cast(long)delta); 7609 err_exit(); 7610 } 7611 c = cast(char)delta; 7612 debug assert(uev.Vblock.Boffset > pbuf.getOffset() || c != 0x7F); 7613 break; 7614 7615 default: 7616 debug printf("fl = %d\n",fl); 7617 assert(0); 7618 } 7619 pbuf.gen(c); 7620 } 7621 7622 7623 /********************************** 7624 */ 7625 7626 version (SCPP) 7627 { 7628 static if (HYDRATE) 7629 { 7630 void code_hydrate(code **pc) 7631 { 7632 code *c; 7633 ubyte ins,rm; 7634 FL fl; 7635 7636 assert(pc); 7637 while (*pc) 7638 { 7639 c = cast(code *) ph_hydrate(cast(void**)pc); 7640 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7641 ins = vex_inssize(c); 7642 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7643 ins = inssize2[(c.Iop >> 8) & 0xFF]; 7644 else if ((c.Iop & 0xFF00) == 0x0F00) 7645 ins = inssize2[c.Iop & 0xFF]; 7646 else 7647 ins = inssize[c.Iop & 0xFF]; 7648 switch (c.Iop) 7649 { 7650 default: 7651 break; 7652 7653 case ESCAPE | ESClinnum: 7654 srcpos_hydrate(&c.IEV1.Vsrcpos); 7655 goto done; 7656 7657 case ESCAPE | ESCctor: 7658 case ESCAPE | ESCdtor: 7659 el_hydrate(&c.IEV1.Vtor); 7660 goto done; 7661 7662 case ASM: 7663 ph_hydrate(cast(void**)&c.IEV1.bytes); 7664 goto done; 7665 } 7666 if (!(ins & M) || 7667 ((rm = c.Irm) & 0xC0) == 0xC0) 7668 goto do2; /* if no first operand */ 7669 if (is32bitaddr(I32,c.Iflags)) 7670 { 7671 7672 if ( 7673 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 7674 ) 7675 goto do2; /* if no first operand */ 7676 } 7677 else 7678 { 7679 if ( 7680 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 7681 ) 7682 goto do2; /* if no first operand */ 7683 } 7684 fl = cast(FL) c.IFL1; 7685 switch (fl) 7686 { 7687 case FLudata: 7688 case FLdata: 7689 case FLreg: 7690 case FLauto: 7691 case FLfast: 7692 case FLbprel: 7693 case FLpara: 7694 case FLcsdata: 7695 case FLfardata: 7696 case FLtlsdata: 7697 case FLfunc: 7698 case FLpseudo: 7699 case FLextern: 7700 assert(flinsymtab[fl]); 7701 symbol_hydrate(&c.IEV1.Vsym); 7702 symbol_debug(c.IEV1.Vsym); 7703 break; 7704 7705 case FLdatseg: 7706 case FLfltreg: 7707 case FLallocatmp: 7708 case FLcs: 7709 case FLndp: 7710 case FLoffset: 7711 case FLlocalsize: 7712 case FLconst: 7713 case FLframehandler: 7714 assert(!flinsymtab[fl]); 7715 break; 7716 7717 case FLcode: 7718 ph_hydrate(cast(void**)&c.IEV1.Vcode); 7719 break; 7720 7721 case FLblock: 7722 case FLblockoff: 7723 ph_hydrate(cast(void**)&c.IEV1.Vblock); 7724 break; 7725 version (SCPP) 7726 { 7727 case FLctor: 7728 case FLdtor: 7729 el_hydrate(cast(elem**)&c.IEV1.Vtor); 7730 break; 7731 } 7732 case FLasm: 7733 ph_hydrate(cast(void**)&c.IEV1.bytes); 7734 break; 7735 7736 default: 7737 WRFL(fl); 7738 assert(0); 7739 } 7740 do2: 7741 /* Ignore TEST (F6 and F7) opcodes */ 7742 if (!(ins & T)) 7743 goto done; /* if no second operand */ 7744 7745 fl = cast(FL) c.IFL2; 7746 switch (fl) 7747 { 7748 case FLudata: 7749 case FLdata: 7750 case FLreg: 7751 case FLauto: 7752 case FLfast: 7753 case FLbprel: 7754 case FLpara: 7755 case FLcsdata: 7756 case FLfardata: 7757 case FLtlsdata: 7758 case FLfunc: 7759 case FLpseudo: 7760 case FLextern: 7761 assert(flinsymtab[fl]); 7762 symbol_hydrate(&c.IEV2.Vsym); 7763 symbol_debug(c.IEV2.Vsym); 7764 break; 7765 7766 case FLdatseg: 7767 case FLfltreg: 7768 case FLallocatmp: 7769 case FLcs: 7770 case FLndp: 7771 case FLoffset: 7772 case FLlocalsize: 7773 case FLconst: 7774 case FLframehandler: 7775 assert(!flinsymtab[fl]); 7776 break; 7777 7778 case FLcode: 7779 ph_hydrate(cast(void**)&c.IEV2.Vcode); 7780 break; 7781 7782 case FLblock: 7783 case FLblockoff: 7784 ph_hydrate(cast(void**)&c.IEV2.Vblock); 7785 break; 7786 7787 default: 7788 WRFL(fl); 7789 assert(0); 7790 } 7791 done: 7792 { } 7793 7794 pc = &c.next; 7795 } 7796 } 7797 } 7798 7799 /********************************** 7800 */ 7801 7802 static if (DEHYDRATE) 7803 { 7804 void code_dehydrate(code **pc) 7805 { 7806 code *c; 7807 ubyte ins,rm; 7808 FL fl; 7809 7810 while ((c = *pc) != null) 7811 { 7812 ph_dehydrate(pc); 7813 7814 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7815 ins = vex_inssize(c); 7816 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7817 ins = inssize2[(c.Iop >> 8) & 0xFF]; 7818 else if ((c.Iop & 0xFF00) == 0x0F00) 7819 ins = inssize2[c.Iop & 0xFF]; 7820 else 7821 ins = inssize[c.Iop & 0xFF]; 7822 switch (c.Iop) 7823 { 7824 default: 7825 break; 7826 7827 case ESCAPE | ESClinnum: 7828 srcpos_dehydrate(&c.IEV1.Vsrcpos); 7829 goto done; 7830 7831 case ESCAPE | ESCctor: 7832 case ESCAPE | ESCdtor: 7833 el_dehydrate(&c.IEV1.Vtor); 7834 goto done; 7835 7836 case ASM: 7837 ph_dehydrate(&c.IEV1.bytes); 7838 goto done; 7839 } 7840 7841 if (!(ins & M) || 7842 ((rm = c.Irm) & 0xC0) == 0xC0) 7843 goto do2; /* if no first operand */ 7844 if (is32bitaddr(I32,c.Iflags)) 7845 { 7846 7847 if ( 7848 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 7849 ) 7850 goto do2; /* if no first operand */ 7851 } 7852 else 7853 { 7854 if ( 7855 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 7856 ) 7857 goto do2; /* if no first operand */ 7858 } 7859 fl = cast(FL) c.IFL1; 7860 switch (fl) 7861 { 7862 case FLudata: 7863 case FLdata: 7864 case FLreg: 7865 case FLauto: 7866 case FLfast: 7867 case FLbprel: 7868 case FLpara: 7869 case FLcsdata: 7870 case FLfardata: 7871 case FLtlsdata: 7872 case FLfunc: 7873 case FLpseudo: 7874 case FLextern: 7875 assert(flinsymtab[fl]); 7876 symbol_dehydrate(&c.IEV1.Vsym); 7877 break; 7878 7879 case FLdatseg: 7880 case FLfltreg: 7881 case FLallocatmp: 7882 case FLcs: 7883 case FLndp: 7884 case FLoffset: 7885 case FLlocalsize: 7886 case FLconst: 7887 case FLframehandler: 7888 assert(!flinsymtab[fl]); 7889 break; 7890 7891 case FLcode: 7892 ph_dehydrate(&c.IEV1.Vcode); 7893 break; 7894 7895 case FLblock: 7896 case FLblockoff: 7897 ph_dehydrate(&c.IEV1.Vblock); 7898 break; 7899 version (SCPP) 7900 { 7901 case FLctor: 7902 case FLdtor: 7903 el_dehydrate(&c.IEV1.Vtor); 7904 break; 7905 } 7906 case FLasm: 7907 ph_dehydrate(&c.IEV1.bytes); 7908 break; 7909 7910 default: 7911 WRFL(fl); 7912 assert(0); 7913 break; 7914 } 7915 do2: 7916 /* Ignore TEST (F6 and F7) opcodes */ 7917 if (!(ins & T)) 7918 goto done; /* if no second operand */ 7919 7920 fl = cast(FL) c.IFL2; 7921 switch (fl) 7922 { 7923 case FLudata: 7924 case FLdata: 7925 case FLreg: 7926 case FLauto: 7927 case FLfast: 7928 case FLbprel: 7929 case FLpara: 7930 case FLcsdata: 7931 case FLfardata: 7932 case FLtlsdata: 7933 case FLfunc: 7934 case FLpseudo: 7935 case FLextern: 7936 assert(flinsymtab[fl]); 7937 symbol_dehydrate(&c.IEV2.Vsym); 7938 break; 7939 7940 case FLdatseg: 7941 case FLfltreg: 7942 case FLallocatmp: 7943 case FLcs: 7944 case FLndp: 7945 case FLoffset: 7946 case FLlocalsize: 7947 case FLconst: 7948 case FLframehandler: 7949 assert(!flinsymtab[fl]); 7950 break; 7951 7952 case FLcode: 7953 ph_dehydrate(&c.IEV2.Vcode); 7954 break; 7955 7956 case FLblock: 7957 case FLblockoff: 7958 ph_dehydrate(&c.IEV2.Vblock); 7959 break; 7960 7961 default: 7962 WRFL(fl); 7963 assert(0); 7964 break; 7965 } 7966 done: 7967 pc = &code_next(c); 7968 } 7969 } 7970 } 7971 } 7972 7973 /*************************** 7974 * Debug code to dump code structure. 7975 */ 7976 7977 void WRcodlst(code *c) 7978 { 7979 for (; c; c = code_next(c)) 7980 code_print(c); 7981 } 7982 7983 extern (C) void code_print(code* c) 7984 { 7985 ubyte ins; 7986 ubyte rexb; 7987 7988 if (c == null) 7989 { 7990 printf("code 0\n"); 7991 return; 7992 } 7993 7994 const op = c.Iop; 7995 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7996 ins = vex_inssize(c); 7997 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7998 ins = inssize2[(op >> 8) & 0xFF]; 7999 else if ((c.Iop & 0xFF00) == 0x0F00) 8000 ins = inssize2[op & 0xFF]; 8001 else 8002 ins = inssize[op & 0xFF]; 8003 8004 printf("code %p: nxt=%p ",c,code_next(c)); 8005 8006 if (c.Iflags & CFvex) 8007 { 8008 if (c.Iflags & CFvex3) 8009 { 8010 printf("vex=0xC4"); 8011 printf(" 0x%02X", VEX3_B1(c.Ivex)); 8012 printf(" 0x%02X", VEX3_B2(c.Ivex)); 8013 rexb = 8014 ( c.Ivex.w ? REX_W : 0) | 8015 (!c.Ivex.r ? REX_R : 0) | 8016 (!c.Ivex.x ? REX_X : 0) | 8017 (!c.Ivex.b ? REX_B : 0); 8018 } 8019 else 8020 { 8021 printf("vex=0xC5"); 8022 printf(" 0x%02X", VEX2_B1(c.Ivex)); 8023 rexb = !c.Ivex.r ? REX_R : 0; 8024 } 8025 printf(" "); 8026 } 8027 else 8028 rexb = c.Irex; 8029 8030 if (rexb) 8031 { 8032 printf("rex=0x%02X ", c.Irex); 8033 if (rexb & REX_W) 8034 printf("W"); 8035 if (rexb & REX_R) 8036 printf("R"); 8037 if (rexb & REX_X) 8038 printf("X"); 8039 if (rexb & REX_B) 8040 printf("B"); 8041 printf(" "); 8042 } 8043 printf("op=0x%02X",op); 8044 8045 if ((op & 0xFF) == ESCAPE) 8046 { 8047 if ((op & 0xFF00) == ESClinnum) 8048 { 8049 printf(" linnum = %d\n",c.IEV1.Vsrcpos.Slinnum); 8050 return; 8051 } 8052 printf(" ESCAPE %d",c.Iop >> 8); 8053 } 8054 if (c.Iflags) 8055 printf(" flg=%x",c.Iflags); 8056 if (ins & M) 8057 { 8058 uint rm = c.Irm; 8059 printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7); 8060 if (!I16 && issib(rm)) 8061 { 8062 ubyte sib = c.Isib; 8063 printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7); 8064 } 8065 if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40) 8066 { 8067 switch (c.IFL1) 8068 { 8069 case FLconst: 8070 case FLoffset: 8071 printf(" int = %4d",c.IEV1.Vuns); 8072 break; 8073 8074 case FLblock: 8075 printf(" block = %p",c.IEV1.Vblock); 8076 break; 8077 8078 case FLswitch: 8079 case FLblockoff: 8080 case FLlocalsize: 8081 case FLframehandler: 8082 case 0: 8083 break; 8084 8085 case FLdatseg: 8086 printf(" FLdatseg %d.%llx",c.IEV1.Vseg,cast(ulong)c.IEV1.Vpointer); 8087 break; 8088 8089 case FLauto: 8090 case FLfast: 8091 case FLreg: 8092 case FLdata: 8093 case FLudata: 8094 case FLpara: 8095 case FLbprel: 8096 case FLtlsdata: 8097 case FLextern: 8098 printf(" "); 8099 WRFL(cast(FL)c.IFL1); 8100 printf(" sym='%s'",c.IEV1.Vsym.Sident.ptr); 8101 if (c.IEV1.Voffset) 8102 printf(".%d", cast(int)c.IEV1.Voffset); 8103 break; 8104 8105 default: 8106 WRFL(cast(FL)c.IFL1); 8107 break; 8108 } 8109 } 8110 } 8111 if (ins & T) 8112 { 8113 printf(" "); 8114 WRFL(cast(FL)c.IFL2); 8115 switch (c.IFL2) 8116 { 8117 case FLconst: 8118 printf(" int = %4d",c.IEV2.Vuns); 8119 break; 8120 8121 case FLblock: 8122 printf(" block = %p",c.IEV2.Vblock); 8123 break; 8124 8125 case FLswitch: 8126 case FLblockoff: 8127 case 0: 8128 case FLlocalsize: 8129 case FLframehandler: 8130 break; 8131 8132 case FLdatseg: 8133 printf(" %d.%llx",c.IEV2.Vseg,cast(ulong)c.IEV2.Vpointer); 8134 break; 8135 8136 case FLauto: 8137 case FLfast: 8138 case FLreg: 8139 case FLpara: 8140 case FLbprel: 8141 case FLfunc: 8142 case FLdata: 8143 case FLudata: 8144 case FLtlsdata: 8145 printf(" sym='%s'",c.IEV2.Vsym.Sident.ptr); 8146 break; 8147 8148 case FLcode: 8149 printf(" code = %p",c.IEV2.Vcode); 8150 break; 8151 8152 default: 8153 WRFL(cast(FL)c.IFL2); 8154 break; 8155 } 8156 } 8157 printf("\n"); 8158 } 8159 8160 }