1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1994-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod3.d, backend/cod3.d) 10 * Documentation: https://dlang.org/phobos/dmd_backend_cod3.html 11 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod3.d 12 */ 13 14 module dmd.backend.cod3; 15 16 version (SCPP) 17 version = COMPILE; 18 version (MARS) 19 version = COMPILE; 20 21 version (COMPILE) 22 { 23 24 import core.stdc.stdio; 25 import core.stdc.stdlib; 26 import core.stdc.string; 27 28 import dmd.backend.backend; 29 import dmd.backend.cc; 30 import dmd.backend.cdef; 31 import dmd.backend.cgcse; 32 import dmd.backend.code; 33 import dmd.backend.code_x86; 34 import dmd.backend.codebuilder; 35 import dmd.backend.dlist; 36 import dmd.backend.dvec; 37 import dmd.backend.melf; 38 import dmd.backend.mem; 39 import dmd.backend.el; 40 import dmd.backend.exh; 41 import dmd.backend.global; 42 import dmd.backend.obj; 43 import dmd.backend.oper; 44 import dmd.backend.outbuf; 45 import dmd.backend.rtlsym; 46 import dmd.backend.symtab; 47 import dmd.backend.ty; 48 import dmd.backend.type; 49 import dmd.backend.xmm; 50 51 version (SCPP) 52 { 53 import parser; 54 import precomp; 55 } 56 57 extern (C++): 58 59 nothrow: 60 61 version (MARS) 62 enum MARS = true; 63 else 64 enum MARS = false; 65 66 int REGSIZE(); 67 68 extern __gshared CGstate cgstate; 69 extern __gshared ubyte[FLMAX] segfl; 70 extern __gshared bool[FLMAX] stackfl, flinsymtab; 71 72 private extern (D) uint mask(uint m) { return 1 << m; } 73 74 //private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 75 76 extern __gshared targ_size_t retsize; 77 78 enum JMPJMPTABLE = false; // benchmarking shows it's slower 79 80 enum MINLL = 0x8000_0000_0000_0000L; 81 enum MAXLL = 0x7FFF_FFFF_FFFF_FFFFL; 82 83 /************* 84 * Size in bytes of each instruction. 85 * 0 means illegal instruction. 86 * bit M: if there is a modregrm field (EV1 is reserved for modregrm) 87 * bit T: if there is a second operand (EV2) 88 * bit E: if second operand is only 8 bits 89 * bit A: a short version exists for the AX reg 90 * bit R: a short version exists for regs 91 * bits 2..0: size of instruction (excluding optional bytes) 92 */ 93 94 enum 95 { 96 M = 0x80, 97 T = 0x40, 98 E = 0x20, 99 A = 0x10, 100 R = 0x08, 101 W = 0, 102 } 103 104 private __gshared ubyte[256] inssize = 105 [ M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 00 */ 106 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 08 */ 107 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 10 */ 108 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 18 */ 109 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 20 */ 110 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 28 */ 111 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 30 */ 112 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 38 */ 113 1,1,1,1, 1,1,1,1, /* 40 */ 114 1,1,1,1, 1,1,1,1, /* 48 */ 115 1,1,1,1, 1,1,1,1, /* 50 */ 116 1,1,1,1, 1,1,1,1, /* 58 */ 117 1,1,M|2,M|2, 1,1,1,1, /* 60 */ 118 T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1, /* 68 */ 119 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 70 */ 120 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 78 */ 121 M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3, M|2,M|2,M|2,M|A|R|2, /* 80 */ 122 M|A|2,M|A|2,M|A|2,M|A|2, M|2,M|2,M|2,M|R|2, /* 88 */ 123 1,1,1,1, 1,1,1,1, /* 90 */ 124 1,1,T|5,1, 1,1,1,1, /* 98 */ 125 126 // cod3_set32() patches this 127 // T|5,T|5,T|5,T|5, 1,1,1,1, /* A0 */ 128 T|3,T|3,T|3,T|3, 1,1,1,1, /* A0 */ 129 130 T|E|2,T|3,1,1, 1,1,1,1, /* A8 */ 131 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* B0 */ 132 T|3,T|3,T|3,T|3, T|3,T|3,T|3,T|3, /* B8 */ 133 M|T|E|3,M|T|E|3,T|3,1, M|2,M|2,M|T|E|R|3,M|T|R|4, /* C0 */ 134 T|E|4,1,T|3,1, 1,T|E|2,1,1, /* C8 */ 135 M|2,M|2,M|2,M|2, T|E|2,T|E|2,0,1, /* D0 */ 136 /* For the floating instructions, allow room for the FWAIT */ 137 M|2,M|2,M|2,M|2, M|2,M|2,M|2,M|2, /* D8 */ 138 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* E0 */ 139 T|3,T|3,T|5,T|E|2, 1,1,1,1, /* E8 */ 140 1,0,1,1, 1,1,M|A|2,M|A|2, /* F0 */ 141 1,1,1,1, 1,1,M|2,M|R|2 /* F8 */ 142 ]; 143 144 private __gshared const ubyte[256] inssize32 = 145 [ 2,2,2,2, 2,5,1,1, /* 00 */ 146 2,2,2,2, 2,5,1,1, /* 08 */ 147 2,2,2,2, 2,5,1,1, /* 10 */ 148 2,2,2,2, 2,5,1,1, /* 18 */ 149 2,2,2,2, 2,5,1,1, /* 20 */ 150 2,2,2,2, 2,5,1,1, /* 28 */ 151 2,2,2,2, 2,5,1,1, /* 30 */ 152 2,2,2,2, 2,5,1,1, /* 38 */ 153 1,1,1,1, 1,1,1,1, /* 40 */ 154 1,1,1,1, 1,1,1,1, /* 48 */ 155 1,1,1,1, 1,1,1,1, /* 50 */ 156 1,1,1,1, 1,1,1,1, /* 58 */ 157 1,1,2,2, 1,1,1,1, /* 60 */ 158 5,6,2,3, 1,1,1,1, /* 68 */ 159 2,2,2,2, 2,2,2,2, /* 70 */ 160 2,2,2,2, 2,2,2,2, /* 78 */ 161 3,6,3,3, 2,2,2,2, /* 80 */ 162 2,2,2,2, 2,2,2,2, /* 88 */ 163 1,1,1,1, 1,1,1,1, /* 90 */ 164 1,1,7,1, 1,1,1,1, /* 98 */ 165 5,5,5,5, 1,1,1,1, /* A0 */ 166 2,5,1,1, 1,1,1,1, /* A8 */ 167 2,2,2,2, 2,2,2,2, /* B0 */ 168 5,5,5,5, 5,5,5,5, /* B8 */ 169 3,3,3,1, 2,2,3,6, /* C0 */ 170 4,1,3,1, 1,2,1,1, /* C8 */ 171 2,2,2,2, 2,2,0,1, /* D0 */ 172 /* For the floating instructions, don't need room for the FWAIT */ 173 2,2,2,2, 2,2,2,2, /* D8 */ 174 175 2,2,2,2, 2,2,2,2, /* E0 */ 176 5,5,7,2, 1,1,1,1, /* E8 */ 177 1,0,1,1, 1,1,2,2, /* F0 */ 178 1,1,1,1, 1,1,2,2 /* F8 */ 179 ]; 180 181 /* For 2 byte opcodes starting with 0x0F */ 182 private __gshared ubyte[256] inssize2 = 183 [ M|3,M|3,M|3,M|3, 2,2,2,2, // 00 184 2,2,M|3,2, 2,M|3,2,M|T|E|4, // 08 185 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 10 186 M|3,2,2,2, 2,2,2,2, // 18 187 M|3,M|3,M|3,M|3, M|3,2,M|3,2, // 20 188 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 28 189 2,2,2,2, 2,2,2,2, // 30 190 M|4,2,M|T|E|5,2, 2,2,2,2, // 38 191 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 40 192 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 48 193 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 50 194 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 58 195 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 60 196 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 68 197 M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70 198 2,2,2,2, M|3,M|3,M|3,M|3, // 78 199 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80 200 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88 201 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 90 202 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 98 203 2,2,2,M|3, M|T|E|4,M|3,2,2, // A0 204 2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8 205 M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0 206 M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8 207 M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0 208 2,2,2,2, 2,2,2,2, // C8 209 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0 210 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8 211 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0 212 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8 213 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0 214 M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8 215 ]; 216 217 /************************************************* 218 * Generate code to save `reg` in `regsave` stack area. 219 * Params: 220 * regsave = register save areay on stack 221 * cdb = where to write generated code 222 * reg = register to save 223 * idx = set to location in regsave for use in REGSAVE_restore() 224 */ 225 226 void REGSAVE_save(ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, out uint idx) 227 { 228 if (isXMMreg(reg)) 229 { 230 regsave.alignment = 16; 231 regsave.idx = (regsave.idx + 15) & ~15; 232 idx = regsave.idx; 233 regsave.idx += 16; 234 // MOVD idx[RBP],xmm 235 opcode_t op = STOAPD; 236 if (TARGET_LINUX && I32) 237 // Haven't yet figured out why stack is not aligned to 16 238 op = STOUPD; 239 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 240 } 241 else 242 { 243 if (!regsave.alignment) 244 regsave.alignment = REGSIZE; 245 idx = regsave.idx; 246 regsave.idx += REGSIZE; 247 // MOV idx[RBP],reg 248 cdb.genc1(0x89,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 249 if (I64) 250 code_orrex(cdb.last(), REX_W); 251 } 252 reflocal = true; 253 if (regsave.idx > regsave.top) 254 regsave.top = regsave.idx; // keep high water mark 255 } 256 257 /******************************* 258 * Restore `reg` from `regsave` area. 259 * Complement REGSAVE_save(). 260 */ 261 262 void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, uint idx) 263 { 264 if (isXMMreg(reg)) 265 { 266 assert(regsave.alignment == 16); 267 // MOVD xmm,idx[RBP] 268 opcode_t op = LODAPD; 269 if (TARGET_LINUX && I32) 270 // Haven't yet figured out why stack is not aligned to 16 271 op = LODUPD; 272 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 273 } 274 else 275 { // MOV reg,idx[RBP] 276 cdb.genc1(0x8B,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 277 if (I64) 278 code_orrex(cdb.last(), REX_W); 279 } 280 } 281 282 /************************************ 283 * Size for vex encoded instruction. 284 */ 285 286 ubyte vex_inssize(code *c) 287 { 288 assert(c.Iflags & CFvex && c.Ivex.pfx == 0xC4); 289 ubyte ins; 290 if (c.Iflags & CFvex3) 291 { 292 switch (c.Ivex.mmmm) 293 { 294 case 0: // no prefix 295 case 1: // 0F 296 ins = cast(ubyte)(inssize2[c.Ivex.op] + 2); 297 break; 298 case 2: // 0F 38 299 ins = cast(ubyte)(inssize2[0x38] + 1); 300 break; 301 case 3: // 0F 3A 302 ins = cast(ubyte)(inssize2[0x3A] + 1); 303 break; 304 default: 305 printf("Iop = %x mmmm = %x\n", c.Iop, c.Ivex.mmmm); 306 assert(0); 307 } 308 } 309 else 310 { 311 ins = cast(ubyte)(inssize2[c.Ivex.op] + 1); 312 } 313 return ins; 314 } 315 316 /************************************ 317 * Determine if there is a modregrm byte for code. 318 */ 319 320 int cod3_EA(code *c) 321 { uint ins; 322 323 opcode_t op1 = c.Iop & 0xFF; 324 if (op1 == ESCAPE) 325 ins = 0; 326 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 327 ins = inssize2[(c.Iop >> 8) & 0xFF]; 328 else if ((c.Iop & 0xFF00) == 0x0F00) 329 ins = inssize2[op1]; 330 else 331 ins = inssize[op1]; 332 return ins & M; 333 } 334 335 /******************************** 336 * setup ALLREGS and BYTEREGS 337 * called by: codgen 338 */ 339 340 void cod3_initregs() 341 { 342 if (I64) 343 { 344 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 345 BYTEREGS = ALLREGS; 346 } 347 else 348 { 349 ALLREGS = ALLREGS_INIT; 350 BYTEREGS = BYTEREGS_INIT; 351 } 352 } 353 354 /******************************** 355 * set initial global variable values 356 */ 357 358 void cod3_setdefault() 359 { 360 fregsaved = mBP | mSI | mDI; 361 } 362 363 /******************************** 364 * Fix global variables for 386. 365 */ 366 367 void cod3_set32() 368 { 369 inssize[0xA0] = T|5; 370 inssize[0xA1] = T|5; 371 inssize[0xA2] = T|5; 372 inssize[0xA3] = T|5; 373 BPRM = 5; /* [EBP] addressing mode */ 374 fregsaved = mBP | mBX | mSI | mDI; // saved across function calls 375 FLOATREGS = FLOATREGS_32; 376 FLOATREGS2 = FLOATREGS2_32; 377 DOUBLEREGS = DOUBLEREGS_32; 378 if (config.flags3 & CFG3eseqds) 379 fregsaved |= mES; 380 381 foreach (ref v; inssize2[0x80 .. 0x90]) 382 v = W|T|6; 383 384 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 4; 385 } 386 387 /******************************** 388 * Fix global variables for I64. 389 */ 390 391 void cod3_set64() 392 { 393 inssize[0xA0] = T|5; // MOV AL,mem 394 inssize[0xA1] = T|5; // MOV RAX,mem 395 inssize[0xA2] = T|5; // MOV mem,AL 396 inssize[0xA3] = T|5; // MOV mem,RAX 397 BPRM = 5; // [RBP] addressing mode 398 399 static if (TARGET_WINDOS) 400 { 401 fregsaved = mBP | mBX | mDI | mSI | mR12 | mR13 | mR14 | mR15 | mES | mXMM6 | mXMM7; // also XMM8..15; 402 } 403 else 404 { 405 fregsaved = mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls 406 } 407 FLOATREGS = FLOATREGS_64; 408 FLOATREGS2 = FLOATREGS2_64; 409 DOUBLEREGS = DOUBLEREGS_64; 410 411 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 412 BYTEREGS = ALLREGS; 413 414 foreach (ref v; inssize2[0x80 .. 0x90]) 415 v = W|T|6; 416 417 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 8; 418 } 419 420 /********************************* 421 * Word or dword align start of function. 422 * Params: 423 * seg = segment to write alignment bytes to 424 * nbytes = number of alignment bytes to write 425 */ 426 void cod3_align_bytes(int seg, size_t nbytes) 427 { 428 /* Table 4-2 from Intel Instruction Set Reference M-Z 429 * 1 bytes NOP 90 430 * 2 bytes 66 NOP 66 90 431 * 3 bytes NOP DWORD ptr [EAX] 0F 1F 00 432 * 4 bytes NOP DWORD ptr [EAX + 00H] 0F 1F 40 00 433 * 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H] 0F 1F 44 00 00 434 * 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H] 66 0F 1F 44 00 00 435 * 7 bytes NOP DWORD ptr [EAX + 00000000H] 0F 1F 80 00 00 00 00 436 * 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H] 0F 1F 84 00 00 00 00 00 437 * 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00 438 * only for CPUs: CPUID.01H.EAX[Bytes 11:8] = 0110B or 1111B 439 */ 440 441 assert(SegData[seg].SDseg == seg); 442 443 while (nbytes) 444 { size_t n = nbytes; 445 const(char)* p; 446 447 if (nbytes > 1 && (I64 || config.fpxmmregs)) 448 { 449 switch (n) 450 { 451 case 2: p = "\x66\x90"; break; 452 case 3: p = "\x0F\x1F\x00"; break; 453 case 4: p = "\x0F\x1F\x40\x00"; break; 454 case 5: p = "\x0F\x1F\x44\x00\x00"; break; 455 case 6: p = "\x66\x0F\x1F\x44\x00\x00"; break; 456 case 7: p = "\x0F\x1F\x80\x00\x00\x00\x00"; break; 457 case 8: p = "\x0F\x1F\x84\x00\x00\x00\x00\x00"; break; 458 default: p = "\x66\x0F\x1F\x84\x00\x00\x00\x00\x00"; n = 9; break; 459 } 460 } 461 else 462 { 463 static immutable ubyte[15] nops = [ 464 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 465 ]; // XCHG AX,AX 466 if (n > nops.length) 467 n = nops.length; 468 p = cast(char*)nops; 469 } 470 objmod.write_bytes(SegData[seg],cast(uint)n,cast(char*)p); 471 nbytes -= n; 472 } 473 } 474 475 /**************************** 476 * Align start of function. 477 * Params: 478 * seg = segment of function 479 */ 480 void cod3_align(int seg) 481 { 482 uint nbytes; 483 static if (TARGET_WINDOS) 484 { 485 if (config.flags4 & CFG4speed) // if optimized for speed 486 { 487 // Pick alignment based on CPU target 488 if (config.target_cpu == TARGET_80486 || 489 config.target_cpu >= TARGET_PentiumPro) 490 { // 486 does reads on 16 byte boundaries, so if we are near 491 // such a boundary, align us to it 492 493 nbytes = -Offset(seg) & 15; 494 if (nbytes < 8) 495 cod3_align_bytes(seg, nbytes); 496 } 497 } 498 } 499 else 500 { 501 nbytes = -Offset(seg) & 7; 502 cod3_align_bytes(seg, nbytes); 503 } 504 } 505 506 507 /********************************** 508 * Generate code to adjust the stack pointer by `nbytes` 509 * Params: 510 * cdb = code builder 511 * nbytes = number of bytes to adjust stack pointer 512 */ 513 void cod3_stackadj(ref CodeBuilder cdb, int nbytes) 514 { 515 //printf("cod3_stackadj(%d)\n", nbytes); 516 uint grex = I64 ? REX_W << 16 : 0; 517 uint rm; 518 if (nbytes > 0) 519 rm = modregrm(3,5,SP); // SUB ESP,nbytes 520 else 521 { 522 nbytes = -nbytes; 523 rm = modregrm(3,0,SP); // ADD ESP,nbytes 524 } 525 cdb.genc2(0x81, grex | rm, nbytes); 526 } 527 528 /********************************** 529 * Generate code to align the stack pointer at `nbytes` 530 * Params: 531 * cdb = code builder 532 * nbytes = number of bytes to align stack pointer 533 */ 534 void cod3_stackalign(ref CodeBuilder cdb, int nbytes) 535 { 536 //printf("cod3_stackalign(%d)\n", nbytes); 537 const grex = I64 ? REX_W << 16 : 0; 538 const rm = modregrm(3, 4, SP); // AND ESP,-nbytes 539 cdb.genc2(0x81, grex | rm, -nbytes); 540 } 541 542 static if (ELFOBJ) 543 { 544 /* Constructor that links the ModuleReference to the head of 545 * the list pointed to by _Dmoduleref 546 */ 547 void cod3_buildmodulector(Outbuffer* buf, int codeOffset, int refOffset) 548 { 549 /* ret 550 * codeOffset: 551 * pushad 552 * mov EAX,&ModuleReference 553 * mov ECX,_DmoduleRef 554 * mov EDX,[ECX] 555 * mov [EAX],EDX 556 * mov [ECX],EAX 557 * popad 558 * ret 559 */ 560 561 const int seg = CODE; 562 563 if (I64 && config.flags3 & CFG3pic) 564 { // LEA RAX,ModuleReference[RIP] 565 buf.writeByte(REX | REX_W); 566 buf.writeByte(LEA); 567 buf.writeByte(modregrm(0,AX,5)); 568 codeOffset += 3; 569 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_PC32, 3 /*STI_DATA*/, refOffset - 4); 570 571 // MOV RCX,_DmoduleRef@GOTPCREL[RIP] 572 buf.writeByte(REX | REX_W); 573 buf.writeByte(0x8B); 574 buf.writeByte(modregrm(0,CX,5)); 575 codeOffset += 3; 576 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_GOTPCREL, Obj.external_def("_Dmodule_ref"), -4); 577 } 578 else 579 { 580 /* movl ModuleReference*, %eax */ 581 buf.writeByte(0xB8); 582 codeOffset += 1; 583 const uint reltype = I64 ? R_X86_64_32 : R_386_32; 584 codeOffset += Obj.writerel(seg, codeOffset, reltype, 3 /*STI_DATA*/, refOffset); 585 586 /* movl _Dmodule_ref, %ecx */ 587 buf.writeByte(0xB9); 588 codeOffset += 1; 589 codeOffset += Obj.writerel(seg, codeOffset, reltype, Obj.external_def("_Dmodule_ref"), 0); 590 } 591 592 if (I64) 593 buf.writeByte(REX | REX_W); 594 buf.writeByte(0x8B); buf.writeByte(0x11); /* movl (%ecx), %edx */ 595 if (I64) 596 buf.writeByte(REX | REX_W); 597 buf.writeByte(0x89); buf.writeByte(0x10); /* movl %edx, (%eax) */ 598 if (I64) 599 buf.writeByte(REX | REX_W); 600 buf.writeByte(0x89); buf.writeByte(0x01); /* movl %eax, (%ecx) */ 601 602 buf.writeByte(0xC3); /* ret */ 603 } 604 605 } 606 607 608 /***************************** 609 * Given a type, return a mask of 610 * registers to hold that type. 611 * Input: 612 * tyf function type 613 */ 614 615 regm_t regmask(tym_t tym, tym_t tyf) 616 { 617 switch (tybasic(tym)) 618 { 619 case TYvoid: 620 case TYstruct: 621 case TYarray: 622 return 0; 623 624 case TYbool: 625 case TYwchar_t: 626 case TYchar16: 627 case TYchar: 628 case TYschar: 629 case TYuchar: 630 case TYshort: 631 case TYushort: 632 case TYint: 633 case TYuint: 634 case TYnullptr: 635 case TYnptr: 636 case TYnref: 637 case TYsptr: 638 case TYcptr: 639 case TYimmutPtr: 640 case TYsharePtr: 641 case TYrestrictPtr: 642 case TYfgPtr: 643 return mAX; 644 645 case TYfloat: 646 case TYifloat: 647 if (I64) 648 return mXMM0; 649 if (config.exe & EX_flat) 650 return mST0; 651 goto case TYlong; 652 653 case TYlong: 654 case TYulong: 655 case TYdchar: 656 if (!I16) 657 return mAX; 658 goto case TYfptr; 659 660 case TYfptr: 661 case TYhptr: 662 return mDX | mAX; 663 664 case TYcent: 665 case TYucent: 666 assert(I64); 667 return mDX | mAX; 668 669 case TYvptr: 670 return mDX | mBX; 671 672 case TYdouble: 673 case TYdouble_alias: 674 case TYidouble: 675 if (I64) 676 return mXMM0; 677 if (config.exe & EX_flat) 678 return mST0; 679 return DOUBLEREGS; 680 681 case TYllong: 682 case TYullong: 683 return I64 ? cast(regm_t) mAX : (I32 ? mDX | mAX : DOUBLEREGS); 684 685 case TYldouble: 686 case TYildouble: 687 return mST0; 688 689 case TYcfloat: 690 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 691 { 692 if (I32 && tybasic(tyf) == TYnfunc) 693 return mDX | mAX; 694 } 695 goto case TYcdouble; 696 697 case TYcdouble: 698 if (I64) 699 return mXMM0 | mXMM1; 700 goto case TYcldouble; 701 702 case TYcldouble: 703 return mST01; 704 705 // SIMD vector types 706 case TYfloat4: 707 case TYdouble2: 708 case TYschar16: 709 case TYuchar16: 710 case TYshort8: 711 case TYushort8: 712 case TYlong4: 713 case TYulong4: 714 case TYllong2: 715 case TYullong2: 716 717 case TYfloat8: 718 case TYdouble4: 719 case TYschar32: 720 case TYuchar32: 721 case TYshort16: 722 case TYushort16: 723 case TYlong8: 724 case TYulong8: 725 case TYllong4: 726 case TYullong4: 727 if (!config.fpxmmregs) 728 { printf("SIMD operations not supported on this platform\n"); 729 exit(1); 730 } 731 return mXMM0; 732 733 default: 734 debug WRTYxx(tym); 735 assert(0); 736 } 737 } 738 739 /******************************* 740 * setup register allocator parameters with platform specific data 741 */ 742 void cgreg_dst_regs(reg_t* dst_integer_reg, reg_t* dst_float_reg) 743 { 744 *dst_integer_reg = AX; 745 *dst_float_reg = XMM0; 746 } 747 748 void cgreg_set_priorities(tym_t ty, const(reg_t)** pseq, const(reg_t)** pseqmsw) 749 { 750 const sz = tysize(ty); 751 752 if (tyxmmreg(ty)) 753 { 754 static immutable ubyte[9] sequence = [XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,NOREG]; 755 *pseq = sequence.ptr; 756 } 757 else if (I64) 758 { 759 if (sz == REGSIZE * 2) 760 { 761 static immutable ubyte[3] seqmsw1 = [CX,DX,NOREG]; 762 static immutable ubyte[5] seqlsw1 = [AX,BX,SI,DI,NOREG]; 763 *pseq = seqlsw1.ptr; 764 *pseqmsw = seqmsw1.ptr; 765 } 766 else 767 { // R10 is reserved for the static link 768 static immutable ubyte[15] sequence2 = [AX,CX,DX,SI,DI,R8,R9,R11,BX,R12,R13,R14,R15,BP,NOREG]; 769 *pseq = cast(ubyte*)sequence2.ptr; 770 } 771 } 772 else if (I32) 773 { 774 if (sz == REGSIZE * 2) 775 { 776 static immutable ubyte[5] seqlsw3 = [AX,BX,SI,DI,NOREG]; 777 static immutable ubyte[3] seqmsw3 = [CX,DX,NOREG]; 778 *pseq = seqlsw3.ptr; 779 *pseqmsw = seqmsw3.ptr; 780 } 781 else 782 { 783 static immutable ubyte[8] sequence4 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 784 *pseq = sequence4.ptr; 785 } 786 } 787 else 788 { assert(I16); 789 if (typtr(ty)) 790 { 791 // For pointer types, try to pick index register first 792 static immutable ubyte[8] seqidx5 = [BX,SI,DI,AX,CX,DX,BP,NOREG]; 793 *pseq = seqidx5.ptr; 794 } 795 else 796 { 797 // Otherwise, try to pick index registers last 798 static immutable ubyte[8] sequence6 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 799 *pseq = sequence6.ptr; 800 } 801 } 802 } 803 804 /******************************************* 805 * Call finally block. 806 * Params: 807 * bf = block to call 808 * retregs = registers to preserve across call 809 * Returns: 810 * code generated 811 */ 812 private code *callFinallyBlock(block *bf, regm_t retregs) 813 { 814 CodeBuilder cdbs; cdbs.ctor(); 815 CodeBuilder cdbr; cdbr.ctor(); 816 int nalign = 0; 817 818 calledFinally = true; 819 uint npush = gensaverestore(retregs,cdbs,cdbr); 820 821 if (STACKALIGN >= 16) 822 { npush += REGSIZE; 823 if (npush & (STACKALIGN - 1)) 824 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 825 cod3_stackadj(cdbs, nalign); 826 } 827 } 828 cdbs.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf); 829 regcon.immed.mval = 0; 830 if (nalign) 831 cod3_stackadj(cdbs, -nalign); 832 cdbs.append(cdbr); 833 return cdbs.finish(); 834 } 835 836 /******************************* 837 * Generate block exit code 838 */ 839 void outblkexitcode(ref CodeBuilder cdb, block *bl, ref int anyspill, const(char)* sflsave, Symbol** retsym, const regm_t mfuncregsave) 840 { 841 CodeBuilder cdb2; cdb2.ctor(); 842 elem *e = bl.Belem; 843 block *nextb; 844 regm_t retregs = 0; 845 846 if (bl.BC != BCasm) 847 assert(bl.Bcode == null); 848 849 switch (bl.BC) /* block exit condition */ 850 { 851 case BCiftrue: 852 { 853 bool jcond = true; 854 block *bs1 = bl.nthSucc(0); 855 block *bs2 = bl.nthSucc(1); 856 if (bs1 == bl.Bnext) 857 { // Swap bs1 and bs2 858 block *btmp; 859 860 jcond ^= 1; 861 btmp = bs1; 862 bs1 = bs2; 863 bs2 = btmp; 864 } 865 logexp(cdb,e,jcond,FLblock,cast(code *) bs1); 866 nextb = bs2; 867 } 868 L5: 869 if (configv.addlinenumbers && bl.Bsrcpos.Slinnum && 870 !(funcsym_p.ty() & mTYnaked)) 871 { 872 //printf("BCiftrue: %s(%u)\n", bl.Bsrcpos.Sfilename ? bl.Bsrcpos.Sfilename : "", bl.Bsrcpos.Slinnum); 873 cdb.genlinnum(bl.Bsrcpos); 874 } 875 if (nextb != bl.Bnext) 876 { 877 assert(!(bl.Bflags & BFLepilog)); 878 genjmp(cdb,JMP,FLblock,nextb); 879 } 880 break; 881 882 case BCjmptab: 883 case BCifthen: 884 case BCswitch: 885 { 886 assert(!(bl.Bflags & BFLepilog)); 887 doswitch(cdb,bl); // hide messy details 888 break; 889 } 890 version (MARS) 891 { 892 case BCjcatch: // D catch clause of try-catch 893 assert(ehmethod(funcsym_p) != EHmethod.EH_NONE); 894 // Mark all registers as destroyed. This will prevent 895 // register assignments to variables used in catch blocks. 896 getregs(cdb,lpadregs()); 897 898 if (config.ehmethod == EHmethod.EH_DWARF) 899 { 900 /* Each block must have ESP set to the same value it was at the end 901 * of the prolog. But the unwinder calls catch blocks with ESP set 902 * at the value it was when the throwing function was called, which 903 * may have arguments pushed on the stack. 904 * This instruction will reset ESP to the correct offset from EBP. 905 */ 906 cdb.gen1(ESCAPE | ESCfixesp); 907 } 908 goto case_goto; 909 } 910 version (SCPP) 911 { 912 case BCcatch: // C++ catch clause of try-catch 913 // Mark all registers as destroyed. This will prevent 914 // register assignments to variables used in catch blocks. 915 getregs(cdb,allregs | mES); 916 goto case_goto; 917 918 case BCtry: 919 usednteh |= EHtry; 920 if (config.exe == EX_WIN32) 921 usednteh |= NTEHtry; 922 goto case_goto; 923 } 924 case BCgoto: 925 nextb = bl.nthSucc(0); 926 if ((MARS || 927 funcsym_p.Sfunc.Fflags3 & Fnteh) && 928 ehmethod(funcsym_p) != EHmethod.EH_DWARF && 929 bl.Btry != nextb.Btry && 930 nextb.BC != BC_finally) 931 { 932 regm_t retregsx = 0; 933 gencodelem(cdb,e,&retregsx,true); 934 int toindex = nextb.Btry ? nextb.Btry.Bscope_index : -1; 935 assert(bl.Btry); 936 int fromindex = bl.Btry.Bscope_index; 937 version (MARS) 938 { 939 if (toindex + 1 == fromindex) 940 { // Simply call __finally 941 if (bl.Btry && 942 bl.Btry.nthSucc(1).BC == BCjcatch) 943 { 944 goto L5; // it's a try-catch, not a try-finally 945 } 946 } 947 } 948 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 949 config.ehmethod == EHmethod.EH_SEH) 950 { 951 nteh_unwind(cdb,0,toindex); 952 } 953 else 954 { 955 version (MARS) 956 { 957 if (toindex + 1 <= fromindex) 958 { 959 //c = cat(c, linux_unwind(0, toindex)); 960 block *bt; 961 962 //printf("B%d: fromindex = %d, toindex = %d\n", bl.Bdfoidx, fromindex, toindex); 963 bt = bl; 964 while ((bt = bt.Btry) != null && bt.Bscope_index != toindex) 965 { block *bf; 966 967 //printf("\tbt.Bscope_index = %d, bt.Blast_index = %d\n", bt.Bscope_index, bt.Blast_index); 968 bf = bt.nthSucc(1); 969 // Only look at try-finally blocks 970 if (bf.BC == BCjcatch) 971 continue; 972 973 if (bf == nextb) 974 continue; 975 //printf("\tbf = B%d, nextb = B%d\n", bf.Bdfoidx, nextb.Bdfoidx); 976 if (nextb.BC == BCgoto && 977 !nextb.Belem && 978 bf == nextb.nthSucc(0)) 979 continue; 980 981 // call __finally 982 cdb.append(callFinallyBlock(bf.nthSucc(0), retregsx)); 983 } 984 } 985 } 986 } 987 goto L5; 988 } 989 case_goto: 990 { 991 regm_t retregsx = 0; 992 gencodelem(cdb,e,&retregsx,true); 993 if (anyspill) 994 { // Add in the epilog code 995 CodeBuilder cdbstore; cdbstore.ctor(); 996 CodeBuilder cdbload; cdbload.ctor(); 997 998 for (int i = 0; i < anyspill; i++) 999 { Symbol *s = globsym[i]; 1000 1001 if (s.Sflags & SFLspill && 1002 vec_testbit(dfoidx,s.Srange)) 1003 { 1004 s.Sfl = sflsave[i]; // undo block register assignments 1005 cgreg_spillreg_epilog(bl,s,cdbstore,cdbload); 1006 } 1007 } 1008 cdb.append(cdbstore); 1009 cdb.append(cdbload); 1010 } 1011 nextb = bl.nthSucc(0); 1012 goto L5; 1013 } 1014 1015 case BC_try: 1016 if (config.ehmethod == EHmethod.EH_NONE || funcsym_p.Sfunc.Fflags3 & Feh_none) 1017 { 1018 /* Need to use frame pointer to access locals, not the stack pointer, 1019 * because we'll be calling the BC_finally blocks and the stack will be off. 1020 */ 1021 needframe = 1; 1022 } 1023 else if (config.ehmethod == EHmethod.EH_SEH || config.ehmethod == EHmethod.EH_WIN32) 1024 { 1025 usednteh |= NTEH_try; 1026 nteh_usevars(); 1027 } 1028 else 1029 usednteh |= EHtry; 1030 goto case_goto; 1031 1032 case BC_finally: 1033 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 1034 { 1035 // Mark scratch registers as destroyed. 1036 getregsNoSave(lpadregs()); 1037 1038 regm_t retregsx = 0; 1039 gencodelem(cdb,bl.Belem,&retregsx,true); 1040 1041 // JMP bl.nthSucc(1) 1042 nextb = bl.nthSucc(1); 1043 1044 goto L5; 1045 } 1046 else 1047 { 1048 if (config.ehmethod == EHmethod.EH_SEH || 1049 config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none)) 1050 { 1051 // Mark all registers as destroyed. This will prevent 1052 // register assignments to variables used in finally blocks. 1053 getregsNoSave(lpadregs()); 1054 } 1055 1056 assert(!e); 1057 // Generate CALL to finalizer code 1058 cdb.append(callFinallyBlock(bl.nthSucc(0), 0)); 1059 1060 // JMP bl.nthSucc(1) 1061 nextb = bl.nthSucc(1); 1062 1063 goto L5; 1064 } 1065 1066 case BC_lpad: 1067 { 1068 assert(ehmethod(funcsym_p) == EHmethod.EH_DWARF); 1069 // Mark all registers as destroyed. This will prevent 1070 // register assignments to variables used in finally blocks. 1071 getregsNoSave(lpadregs()); 1072 1073 regm_t retregsx = 0; 1074 gencodelem(cdb,bl.Belem,&retregsx,true); 1075 1076 // JMP bl.nthSucc(0) 1077 nextb = bl.nthSucc(0); 1078 goto L5; 1079 } 1080 1081 case BC_ret: 1082 { 1083 regm_t retregsx = 0; 1084 gencodelem(cdb,e,&retregsx,true); 1085 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 1086 { 1087 } 1088 else 1089 cdb.gen1(0xC3); // RET 1090 break; 1091 } 1092 1093 static if (NTEXCEPTIONS) 1094 { 1095 case BC_except: 1096 { 1097 assert(!e); 1098 usednteh |= NTEH_except; 1099 nteh_setsp(cdb,0x8B); 1100 getregsNoSave(allregs); 1101 nextb = bl.nthSucc(0); 1102 goto L5; 1103 } 1104 case BC_filter: 1105 { 1106 nteh_filter(cdb, bl); 1107 // Mark all registers as destroyed. This will prevent 1108 // register assignments to variables used in filter blocks. 1109 getregsNoSave(allregs); 1110 regm_t retregsx = regmask(e.Ety, TYnfunc); 1111 gencodelem(cdb,e,&retregsx,true); 1112 cdb.gen1(0xC3); // RET 1113 break; 1114 } 1115 } 1116 1117 case BCretexp: 1118 reg_t reg1, reg2, lreg, mreg; 1119 reg1 = reg2 = NOREG; 1120 if (config.exe == EX_WIN64) // broken 1121 retregs = regmask(e.Ety, funcsym_p.ty()); 1122 else 1123 { 1124 retregs = allocretregs(e.Ety, e.ET, funcsym_p.ty(), ®1, ®2); 1125 assert(reg1 != NOREG || !retregs); 1126 } 1127 1128 lreg = mreg = NOREG; 1129 if (reg1 == NOREG) 1130 {} 1131 else if (tybasic(e.Ety) == TYcfloat) 1132 lreg = ST01; 1133 else if (mask(reg1) & (mST0 | mST01)) 1134 lreg = reg1; 1135 else if (reg2 == NOREG) 1136 lreg = reg1; 1137 else if (mask(reg1) & XMMREGS) 1138 { 1139 lreg = XMM0; 1140 mreg = XMM1; 1141 } 1142 else 1143 { 1144 lreg = mask(reg1) & mLSW ? reg1 : AX; 1145 mreg = mask(reg2) & mMSW ? reg2 : DX; 1146 } 1147 if (reg1 != NOREG) 1148 retregs = (mask(lreg) | mask(mreg)) & ~mask(NOREG); 1149 1150 // For the final load into the return regs, don't set regcon.used, 1151 // so that the optimizer can potentially use retregs for register 1152 // variable assignments. 1153 1154 if (config.flags4 & CFG4optimized) 1155 { regm_t usedsave; 1156 1157 docommas(cdb,&e); 1158 usedsave = regcon.used; 1159 if (!OTleaf(e.Eoper)) 1160 gencodelem(cdb,e,&retregs,true); 1161 else 1162 { 1163 if (e.Eoper == OPconst) 1164 regcon.mvar = 0; 1165 gencodelem(cdb,e,&retregs,true); 1166 regcon.used = usedsave; 1167 if (e.Eoper == OPvar) 1168 { Symbol *s = e.EV.Vsym; 1169 1170 if (s.Sfl == FLreg && s.Sregm != mAX) 1171 *retsym = s; 1172 } 1173 } 1174 } 1175 else 1176 { 1177 gencodelem(cdb,e,&retregs,true); 1178 } 1179 1180 if (reg1 == NOREG) 1181 { 1182 } 1183 else if ((mask(reg1) | mask(reg2)) & (mST0 | mST01)) 1184 { 1185 assert(reg1 == lreg && reg2 == NOREG); 1186 } 1187 // fix return registers 1188 else if (tybasic(e.Ety) == TYcfloat) 1189 { 1190 assert(lreg == ST01); 1191 if (I64) 1192 { 1193 assert(reg2 == NOREG); 1194 // spill 1195 pop87(); 1196 pop87(); 1197 cdb.genfltreg(0xD9, 3, tysize(TYfloat)); 1198 genfwait(cdb); 1199 cdb.genfltreg(0xD9, 3, 0); 1200 genfwait(cdb); 1201 // reload 1202 if (config.exe == EX_WIN64) 1203 { 1204 assert(reg1 == AX); 1205 cdb.genfltreg(LOD, reg1, 0); 1206 code_orrex(cdb.last(), REX_W); 1207 } 1208 else 1209 { 1210 assert(reg1 == XMM0); 1211 cdb.genxmmreg(xmmload(TYdouble), reg1, 0, TYdouble); 1212 } 1213 } 1214 else 1215 { 1216 assert(reg1 == AX && reg2 == DX); 1217 regm_t pretregs = mask(reg1) | mask(reg2); 1218 fixresult_complex87(cdb, e, retregs, &pretregs); 1219 } 1220 } 1221 else if (reg2 == NOREG) 1222 assert(lreg == reg1); 1223 else for (int v = 0; v < 2; v++) 1224 { 1225 if (v ^ (reg1 != mreg)) 1226 genmovreg(cdb, reg1, lreg); 1227 else 1228 genmovreg(cdb, reg2, mreg); 1229 } 1230 if (reg1 != NOREG) 1231 retregs = (mask(reg1) | mask(reg2)) & ~mask(NOREG); 1232 goto L4; 1233 1234 case BCret: 1235 case BCexit: 1236 retregs = 0; 1237 gencodelem(cdb,e,&retregs,true); 1238 L4: 1239 if (retregs == mST0) 1240 { assert(global87.stackused == 1); 1241 pop87(); // account for return value 1242 } 1243 else if (retregs == mST01) 1244 { assert(global87.stackused == 2); 1245 pop87(); 1246 pop87(); // account for return value 1247 } 1248 1249 if (bl.BC == BCexit) 1250 { 1251 if (config.flags4 & CFG4optimized) 1252 mfuncreg = mfuncregsave; 1253 } 1254 else if (MARS || usednteh & NTEH_try) 1255 { 1256 block *bt = bl; 1257 while ((bt = bt.Btry) != null) 1258 { 1259 block *bf = bt.nthSucc(1); 1260 version (MARS) 1261 { 1262 // Only look at try-finally blocks 1263 if (bf.BC == BCjcatch) 1264 { 1265 continue; 1266 } 1267 } 1268 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 1269 config.ehmethod == EHmethod.EH_SEH) 1270 { 1271 if (bt.Bscope_index == 0) 1272 { 1273 // call __finally 1274 CodeBuilder cdbs; cdbs.ctor(); 1275 CodeBuilder cdbr; cdbr.ctor(); 1276 1277 nteh_gensindex(cdb,-1); 1278 gensaverestore(retregs,cdbs,cdbr); 1279 cdb.append(cdbs); 1280 cdb.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf.nthSucc(0)); 1281 regcon.immed.mval = 0; 1282 cdb.append(cdbr); 1283 } 1284 else 1285 { 1286 nteh_unwind(cdb,retregs,~0); 1287 } 1288 break; 1289 } 1290 else 1291 { 1292 // call __finally 1293 cdb.append(callFinallyBlock(bf.nthSucc(0), retregs)); 1294 } 1295 } 1296 } 1297 break; 1298 1299 case BCasm: 1300 { 1301 assert(!e); 1302 // Mark destroyed registers 1303 CodeBuilder cdbx; cdbx.ctor(); 1304 getregs(cdbx,iasm_regs(bl)); // mark destroyed registers 1305 code *c = cdbx.finish(); 1306 if (bl.Bsucc) 1307 { nextb = bl.nthSucc(0); 1308 if (!bl.Bnext) 1309 { 1310 cdb.append(bl.Bcode); 1311 cdb.append(c); 1312 goto L5; 1313 } 1314 if (nextb != bl.Bnext && 1315 bl.Bnext && 1316 !(bl.Bnext.BC == BCgoto && 1317 !bl.Bnext.Belem && 1318 nextb == bl.Bnext.nthSucc(0))) 1319 { 1320 // See if already have JMP at end of block 1321 code *cl = code_last(bl.Bcode); 1322 if (!cl || cl.Iop != JMP) 1323 { 1324 cdb.append(bl.Bcode); 1325 cdb.append(c); 1326 goto L5; // add JMP at end of block 1327 } 1328 } 1329 } 1330 cdb.append(bl.Bcode); 1331 break; 1332 } 1333 1334 default: 1335 debug 1336 printf("bl.BC = %d\n",bl.BC); 1337 assert(0); 1338 } 1339 } 1340 1341 /*************************** 1342 * Allocate registers for function return values. 1343 * 1344 * Params: 1345 * ty = return type 1346 * t = return type extended info 1347 * tyf = function type 1348 * reg1 = output for the first part register 1349 * reg2 = output for the second part register 1350 * 1351 * Returns: 1352 * a bit mask of return registers. 1353 * 0 if function returns on the stack or returns void. 1354 */ 1355 regm_t allocretregs(tym_t ty, type *t, tym_t tyf, reg_t *reg1, reg_t *reg2) 1356 { 1357 tym_t ty1 = ty; 1358 tym_t ty2 = TYMAX; 1359 1360 *reg1 = *reg2 = NOREG; 1361 1362 if (tybasic(ty) == TYvoid) 1363 return 0; 1364 1365 if (ty & mTYxmmgpr) 1366 { 1367 ty1 = TYdouble; 1368 ty2 = TYllong; 1369 } 1370 else if (ty & mTYgprxmm) 1371 { 1372 ty1 = TYllong; 1373 ty2 = TYdouble; 1374 } 1375 1376 if (tybasic(ty) == TYstruct) 1377 { 1378 assert(t); 1379 ty1 = t.Tty; 1380 } 1381 1382 switch (tyrelax(ty1)) 1383 { 1384 case TYcent: 1385 if (!I64 || config.exe == EX_WIN64) 1386 return 0; 1387 ty1 = ty2 = TYllong; 1388 break; 1389 1390 case TYcdouble: 1391 if (tybasic(tyf) == TYjfunc && I32) 1392 break; 1393 if (!I64 || config.exe == EX_WIN64) 1394 return 0; 1395 ty1 = ty2 = TYdouble; 1396 break; 1397 1398 case TYcfloat: 1399 if (tybasic(tyf) == TYjfunc && I32) 1400 break; 1401 if (!I64) 1402 goto case TYllong; 1403 if (config.exe == EX_WIN64) 1404 ty1 = TYllong; 1405 else 1406 ty1 = TYdouble; 1407 break; 1408 1409 case TYcldouble: 1410 if (tybasic(tyf) == TYjfunc && I32) 1411 break; 1412 if (!I64 || config.exe == EX_WIN64) 1413 return 0; 1414 break; 1415 1416 case TYllong: 1417 if (!I64) 1418 ty1 = ty2 = TYlong; 1419 break; 1420 1421 case TYarray: 1422 type* targ1, targ2; 1423 argtypes(t, targ1, targ2); 1424 if (targ1) 1425 ty1 = targ1.Tty; 1426 else 1427 return 0; 1428 if (targ2) 1429 ty2 = targ2.Tty; 1430 break; 1431 1432 case TYstruct: 1433 assert(t); 1434 if (I64 && config.exe != EX_WIN64) 1435 { 1436 assert(tybasic(t.Tty) == TYstruct); 1437 type *targ1 = t.Ttag.Sstruct.Sarg1type; 1438 type *targ2 = t.Ttag.Sstruct.Sarg2type; 1439 if (targ1) 1440 ty1 = targ1.Tty; 1441 else 1442 return 0; 1443 if (targ2) 1444 ty2 = targ2.Tty; 1445 break; 1446 } 1447 else if (!(t.Ttag.Sstruct.Sflags & STRnotpod)) 1448 { 1449 // windows only, return POD of 1, 2, 4, or 8 bytes on EAX(:EDX) 1450 if (!(config.exe & (EX_WIN64 | EX_WIN32))) 1451 return 0; 1452 1453 uint sz = cast(uint) type_size(t); 1454 1455 if (sz > 8 || sz == 0) 1456 return 0; 1457 1458 if (sz == 8) 1459 { 1460 if (config.exe == EX_WIN64) 1461 ty1 = TYllong; 1462 else 1463 ty1 = ty2 = TYlong; 1464 } 1465 else if (sz == 4 || sz == 2 || sz == 1) 1466 ty1 = TYlong; 1467 else 1468 return 0; 1469 1470 break; 1471 } 1472 return 0; 1473 1474 default: 1475 break; 1476 } 1477 1478 1479 static struct RetRegsAllocator 1480 { 1481 nothrow: 1482 static reg_t[2] gp_regs = [AX, DX]; 1483 static reg_t[2] xmm_regs = [XMM0, XMM1]; 1484 1485 uint cntgpr = 0, 1486 cntxmm = 0; 1487 1488 reg_t gpr() { return gp_regs[cntgpr++]; } 1489 reg_t xmm() { return xmm_regs[cntxmm++]; } 1490 } 1491 1492 tym_t tym = ty1; 1493 reg_t *reg = reg1; 1494 RetRegsAllocator rralloc; 1495 for (int v = 0; v < 2; ++v) 1496 { 1497 if (tym == TYMAX) continue; 1498 switch (tysize(tym)) 1499 { 1500 case 1: 1501 case 2: 1502 case 4: 1503 if (tyfloating(tym)) 1504 { 1505 if (I64) 1506 *reg = rralloc.xmm(); 1507 else 1508 *reg = ST0; 1509 } 1510 else 1511 *reg = rralloc.gpr(); 1512 break; 1513 1514 case 8: 1515 if (tycomplex(tym)) 1516 { 1517 assert(tybasic(tyf) == TYjfunc && I32); 1518 *reg = ST01; 1519 break; 1520 } 1521 assert(I64 || tyfloating(tym)); 1522 goto case 4; 1523 1524 default: 1525 if (tybasic(tym) == TYldouble || tybasic(tym) == TYildouble) 1526 { 1527 *reg = ST0; 1528 break; 1529 } 1530 else if (tybasic(tym) == TYcldouble) 1531 { 1532 *reg = ST01; 1533 break; 1534 } 1535 else if (tycomplex(tym) && tybasic(tyf) == TYjfunc && I32) 1536 { 1537 *reg = ST01; 1538 break; 1539 } 1540 else if (tysimd(tym)) 1541 { 1542 *reg = rralloc.xmm(); 1543 break; 1544 } 1545 1546 debug WRTYxx(tym); 1547 assert(0); 1548 } 1549 tym = ty2; 1550 reg = reg2; 1551 } 1552 return (mask(*reg1) | mask(*reg2)) & ~mask(NOREG); 1553 } 1554 1555 /*********************************************** 1556 * Struct necessary for sorting switch cases. 1557 */ 1558 1559 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 1560 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 1561 1562 extern (C) // qsort cmp functions need to be "C" 1563 { 1564 struct CaseVal 1565 { 1566 targ_ullong val; 1567 block *target; 1568 1569 /* Sort function for qsort() */ 1570 extern (C) static nothrow int cmp(scope const(void*) p, scope const(void*) q) 1571 { 1572 const(CaseVal)* c1 = cast(const(CaseVal)*)p; 1573 const(CaseVal)* c2 = cast(const(CaseVal)*)q; 1574 return (c1.val < c2.val) ? -1 : ((c1.val == c2.val) ? 0 : 1); 1575 } 1576 } 1577 } 1578 1579 /*** 1580 * Generate comparison of [reg2,reg] with val 1581 */ 1582 private void cmpval(ref CodeBuilder cdb, targ_llong val, uint sz, reg_t reg, reg_t reg2, reg_t sreg) 1583 { 1584 if (I64 && sz == 8) 1585 { 1586 assert(reg2 == NOREG); 1587 if (val == cast(int)val) // if val is a 64 bit value sign-extended from 32 bits 1588 { 1589 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,value32 1590 cdb.last().Irex |= REX_W; // 64 bit operand 1591 } 1592 else 1593 { 1594 assert(sreg != NOREG); 1595 movregconst(cdb,sreg,cast(targ_size_t)val,64); // MOV sreg,val64 1596 genregs(cdb,0x3B,reg,sreg); // CMP reg,sreg 1597 code_orrex(cdb.last(), REX_W); 1598 getregsNoSave(mask(sreg)); // don't remember we loaded this constant 1599 } 1600 } 1601 else if (reg2 == NOREG) 1602 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1603 else 1604 { 1605 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1606 code *cnext = gennop(null); 1607 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1608 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1609 cdb.append(cnext); 1610 } 1611 } 1612 1613 private void ifthen(ref CodeBuilder cdb, CaseVal *casevals, size_t ncases, 1614 uint sz, reg_t reg, reg_t reg2, reg_t sreg, block *bdefault, bool last) 1615 { 1616 if (ncases >= 4 && config.flags4 & CFG4speed) 1617 { 1618 size_t pivot = ncases >> 1; 1619 1620 // Compares for casevals[0..pivot] 1621 CodeBuilder cdb1; cdb1.ctor(); 1622 ifthen(cdb1, casevals, pivot, sz, reg, reg2, sreg, bdefault, true); 1623 1624 // Compares for casevals[pivot+1..ncases] 1625 CodeBuilder cdb2; cdb2.ctor(); 1626 ifthen(cdb2, casevals + pivot + 1, ncases - pivot - 1, sz, reg, reg2, sreg, bdefault, last); 1627 code *c2 = gennop(null); 1628 1629 // Compare for caseval[pivot] 1630 cmpval(cdb, casevals[pivot].val, sz, reg, reg2, sreg); 1631 genjmp(cdb,JE,FLblock,casevals[pivot].target); // JE target 1632 // Note uint jump here, as cases were sorted using uint comparisons 1633 genjmp(cdb,JA,FLcode,cast(block *) c2); // JG c2 1634 1635 cdb.append(cdb1); 1636 cdb.append(c2); 1637 cdb.append(cdb2); 1638 } 1639 else 1640 { // Not worth doing a binary search, just do a sequence of CMP/JE 1641 for (size_t n = 0; n < ncases; n++) 1642 { 1643 targ_llong val = casevals[n].val; 1644 cmpval(cdb, val, sz, reg, reg2, sreg); 1645 code *cnext = null; 1646 if (reg2 != NOREG) 1647 { 1648 cnext = gennop(null); 1649 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1650 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1651 } 1652 genjmp(cdb,JE,FLblock,casevals[n].target); // JE caseaddr 1653 cdb.append(cnext); 1654 } 1655 1656 if (last) // if default is not next block 1657 genjmp(cdb,JMP,FLblock,bdefault); 1658 } 1659 } 1660 1661 /******************************* 1662 * Generate code for blocks ending in a switch statement. 1663 * Take BCswitch and decide on 1664 * BCifthen use if - then code 1665 * BCjmptab index into jump table 1666 * BCswitch search table for match 1667 */ 1668 1669 void doswitch(ref CodeBuilder cdb, block *b) 1670 { 1671 targ_ulong msw; 1672 1673 // If switch tables are in code segment and we need a CS: override to get at them 1674 bool csseg = cast(bool)(config.flags & CFGromable); 1675 1676 //printf("doswitch(%d)\n", b.BC); 1677 elem *e = b.Belem; 1678 elem_debug(e); 1679 docommas(cdb,&e); 1680 cgstate.stackclean++; 1681 tym_t tys = tybasic(e.Ety); 1682 int sz = _tysize[tys]; 1683 bool dword = (sz == 2 * REGSIZE); 1684 bool mswsame = true; // assume all msw's are the same 1685 targ_llong *p = b.Bswitch; // pointer to case data 1686 assert(p); 1687 uint ncases = cast(uint)*p++; // number of cases 1688 1689 targ_llong vmax = MINLL; // smallest possible llong 1690 targ_llong vmin = MAXLL; // largest possible llong 1691 for (uint n = 0; n < ncases; n++) // find max and min case values 1692 { 1693 targ_llong val = *p++; 1694 if (val > vmax) vmax = val; 1695 if (val < vmin) vmin = val; 1696 if (REGSIZE == 2) 1697 { 1698 ushort ms = (val >> 16) & 0xFFFF; 1699 if (n == 0) 1700 msw = ms; 1701 else if (msw != ms) 1702 mswsame = 0; 1703 } 1704 else // REGSIZE == 4 1705 { 1706 targ_ulong ms = (val >> 32) & 0xFFFFFFFF; 1707 if (n == 0) 1708 msw = ms; 1709 else if (msw != ms) 1710 mswsame = 0; 1711 } 1712 } 1713 p -= ncases; 1714 //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin); 1715 1716 /* Three kinds of switch strategies - pick one 1717 */ 1718 if (ncases <= 3) 1719 goto Lifthen; 1720 else if (I16 && cast(targ_ullong)(vmax - vmin) <= ncases * 2) 1721 goto Ljmptab; // >=50% of the table is case values, rest is default 1722 else if (cast(targ_ullong)(vmax - vmin) <= ncases * 3) 1723 goto Ljmptab; // >= 33% of the table is case values, rest is default 1724 else if (I16) 1725 goto Lswitch; 1726 else 1727 goto Lifthen; 1728 1729 /*************************************************************************/ 1730 { // generate if-then sequence 1731 Lifthen: 1732 regm_t retregs = ALLREGS; 1733 b.BC = BCifthen; 1734 scodelem(cdb,e,&retregs,0,true); 1735 reg_t reg, reg2; 1736 if (dword) 1737 { reg = findreglsw(retregs); 1738 reg2 = findregmsw(retregs); 1739 } 1740 else 1741 { 1742 reg = findreg(retregs); // reg that result is in 1743 reg2 = NOREG; 1744 } 1745 list_t bl = b.Bsucc; 1746 block *bdefault = b.nthSucc(0); 1747 if (dword && mswsame) 1748 { 1749 cdb.genc2(0x81,modregrm(3,7,reg2),msw); // CMP reg2,MSW 1750 genjmp(cdb,JNE,FLblock,bdefault); // JNE default 1751 reg2 = NOREG; 1752 } 1753 1754 reg_t sreg = NOREG; // may need a scratch register 1755 1756 // Put into casevals[0..ncases] so we can sort then slice 1757 CaseVal *casevals = cast(CaseVal *)malloc(ncases * CaseVal.sizeof); 1758 assert(casevals); 1759 for (uint n = 0; n < ncases; n++) 1760 { 1761 casevals[n].val = p[n]; 1762 bl = list_next(bl); 1763 casevals[n].target = list_block(bl); 1764 1765 // See if we need a scratch register 1766 if (sreg == NOREG && I64 && sz == 8 && p[n] != cast(int)p[n]) 1767 { regm_t regm = ALLREGS & ~mask(reg); 1768 allocreg(cdb,®m, &sreg, TYint); 1769 } 1770 } 1771 1772 // Sort cases so we can do a runtime binary search 1773 qsort(casevals, ncases, CaseVal.sizeof, &CaseVal.cmp); 1774 1775 //for (uint n = 0; n < ncases; n++) 1776 //printf("casevals[%lld] = x%x\n", n, casevals[n].val); 1777 1778 // Generate binary tree of comparisons 1779 ifthen(cdb, casevals, ncases, sz, reg, reg2, sreg, bdefault, bdefault != b.Bnext); 1780 1781 free(casevals); 1782 1783 cgstate.stackclean--; 1784 return; 1785 } 1786 1787 /*************************************************************************/ 1788 { 1789 // Use switch value to index into jump table 1790 Ljmptab: 1791 //printf("Ljmptab:\n"); 1792 1793 b.BC = BCjmptab; 1794 1795 /* If vmin is small enough, we can just set it to 0 and the jump 1796 * table entries from 0..vmin-1 can be set with the default target. 1797 * This saves the SUB instruction. 1798 * Must be same computation as used in outjmptab(). 1799 */ 1800 if (vmin > 0 && vmin <= _tysize[TYint]) 1801 vmin = 0; 1802 1803 b.Btablesize = cast(int) (vmax - vmin + 1) * tysize(TYnptr); 1804 regm_t retregs = IDXREGS; 1805 if (dword) 1806 retregs |= mMSW; 1807 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1808 { 1809 if (I32 && config.flags3 & CFG3pic) 1810 retregs &= ~mBX; // need EBX for GOT 1811 } 1812 bool modify = (I16 || I64 || vmin); 1813 scodelem(cdb,e,&retregs,0,!modify); 1814 reg_t reg = findreg(retregs & IDXREGS); // reg that result is in 1815 reg_t reg2; 1816 if (dword) 1817 reg2 = findregmsw(retregs); 1818 if (modify) 1819 { 1820 assert(!(retregs & regcon.mvar)); 1821 getregs(cdb,retregs); 1822 } 1823 if (vmin) // if there is a minimum 1824 { 1825 cdb.genc2(0x81,modregrm(3,5,reg),cast(targ_size_t)vmin); // SUB reg,vmin 1826 if (dword) 1827 { cdb.genc2(0x81,modregrm(3,3,reg2),cast(targ_size_t)MSREG(vmin)); // SBB reg2,vmin 1828 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1829 } 1830 } 1831 else if (dword) 1832 { gentstreg(cdb,reg2); // TEST reg2,reg2 1833 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1834 } 1835 if (vmax - vmin != REGMASK) // if there is a maximum 1836 { // CMP reg,vmax-vmin 1837 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)(vmax-vmin)); 1838 if (I64 && sz == 8) 1839 code_orrex(cdb.last(), REX_W); 1840 genjmp(cdb,JA,FLblock,b.nthSucc(0)); // JA default 1841 } 1842 if (I64) 1843 { 1844 if (!vmin) 1845 { // Need to clear out high 32 bits of reg 1846 // Use 8B instead of 89, as 89 will be optimized away as a NOP 1847 genregs(cdb,0x8B,reg,reg); // MOV reg,reg 1848 } 1849 if (config.flags3 & CFG3pic || config.exe == EX_WIN64) 1850 { 1851 /* LEA R1,disp[RIP] 48 8D 05 00 00 00 00 1852 * MOVSXD R2,[reg*4][R1] 48 63 14 B8 1853 * LEA R1,[R1][R2] 48 8D 04 02 1854 * JMP R1 FF E0 1855 */ 1856 reg_t r1; 1857 regm_t scratchm = ALLREGS & ~mask(reg); 1858 allocreg(cdb,&scratchm,&r1,TYint); 1859 reg_t r2; 1860 scratchm = ALLREGS & ~(mask(reg) | mask(r1)); 1861 allocreg(cdb,&scratchm,&r2,TYint); 1862 1863 CodeBuilder cdbe; cdbe.ctor(); 1864 cdbe.genc1(LEA,(REX_W << 16) | modregxrm(0,r1,5),FLswitch,0); // LEA R1,disp[RIP] 1865 cdbe.last().IEV1.Vswitch = b; 1866 cdbe.gen2sib(0x63,(REX_W << 16) | modregxrm(0,r2,4), modregxrmx(2,reg,r1)); // MOVSXD R2,[reg*4][R1] 1867 cdbe.gen2sib(LEA,(REX_W << 16) | modregxrm(0,r1,4),modregxrmx(0,r1,r2)); // LEA R1,[R1][R2] 1868 cdbe.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1869 1870 b.Btablesize = cast(int) (vmax - vmin + 1) * 4; 1871 code *ce = cdbe.finish(); 1872 pinholeopt(ce, null); 1873 1874 cdb.append(cdbe); 1875 } 1876 else 1877 { 1878 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[reg*8] 1879 cdb.last().IEV1.Vswitch = b; 1880 cdb.last().Isib = modregrm(3,reg & 7,5); 1881 if (reg & 8) 1882 cdb.last().Irex |= REX_X; 1883 } 1884 } 1885 else if (I32) 1886 { 1887 static if (JMPJMPTABLE) 1888 { 1889 /* LEA jreg,offset ctable[reg][reg * 4] 1890 JMP jreg 1891 ctable: 1892 JMP case0 1893 JMP case1 1894 ... 1895 */ 1896 CodeBuilder ctable; ctable.ctor(); 1897 block *bdef = b.nthSucc(0); 1898 targ_llong u; 1899 for (u = vmin; ; u++) 1900 { block *targ = bdef; 1901 for (n = 0; n < ncases; n++) 1902 { 1903 if (p[n] == u) 1904 { targ = b.nthSucc(n + 1); 1905 break; 1906 } 1907 } 1908 genjmp(ctable,JMP,FLblock,targ); 1909 ctable.last().Iflags |= CFjmp5; // don't shrink these 1910 if (u == vmax) 1911 break; 1912 } 1913 1914 // Allocate scratch register jreg 1915 regm_t scratchm = ALLREGS & ~mask(reg); 1916 uint jreg = AX; 1917 allocreg(cdb,&scratchm,&jreg,TYint); 1918 1919 // LEA jreg, offset ctable[reg][reg*4] 1920 cdb.genc1(LEA,modregrm(2,jreg,4),FLcode,6); 1921 cdb.last().Isib = modregrm(2,reg,reg); 1922 cdb.gen2(0xFF,modregrm(3,4,jreg)); // JMP jreg 1923 cdb.append(ctable); 1924 b.Btablesize = 0; 1925 cgstate.stackclean--; 1926 return; 1927 } 1928 else static if (TARGET_OSX) 1929 { 1930 /* CALL L1 1931 * L1: POP R1 1932 * ADD R1,disp[reg*4][R1] 1933 * JMP R1 1934 */ 1935 // Allocate scratch register r1 1936 regm_t scratchm = ALLREGS & ~mask(reg); 1937 reg_t r1; 1938 allocreg(cdb,&scratchm,&r1,TYint); 1939 1940 cdb.genc2(CALL,0,0); // CALL L1 1941 cdb.gen1(0x58 + r1); // L1: POP R1 1942 cdb.genc1(0x03,modregrm(2,r1,4),FLswitch,0); // ADD R1,disp[reg*4][EBX] 1943 cdb.last().IEV1.Vswitch = b; 1944 cdb.last().Isib = modregrm(2,reg,r1); 1945 cdb.gen2(0xFF,modregrm(3,4,r1)); // JMP R1 1946 } 1947 else 1948 { 1949 if (config.flags3 & CFG3pic) 1950 { 1951 /* MOV R1,EBX 1952 * SUB R1,funcsym_p@GOTOFF[offset][reg*4][EBX] 1953 * JMP R1 1954 */ 1955 1956 // Load GOT in EBX 1957 load_localgot(cdb); 1958 1959 // Allocate scratch register r1 1960 regm_t scratchm = ALLREGS & ~(mask(reg) | mBX); 1961 reg_t r1; 1962 allocreg(cdb,&scratchm,&r1,TYint); 1963 1964 genmovreg(cdb,r1,BX); // MOV R1,EBX 1965 cdb.genc1(0x2B,modregxrm(2,r1,4),FLswitch,0); // SUB R1,disp[reg*4][EBX] 1966 cdb.last().IEV1.Vswitch = b; 1967 cdb.last().Isib = modregrm(2,reg,BX); 1968 cdb.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1969 } 1970 else 1971 { 1972 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[idxreg*4] 1973 cdb.last().IEV1.Vswitch = b; 1974 cdb.last().Isib = modregrm(2,reg,5); 1975 } 1976 } 1977 } 1978 else if (I16) 1979 { 1980 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 1981 uint rm = getaddrmode(retregs) | modregrm(0,4,0); 1982 cdb.genc1(0xFF,rm,FLswitch,0); // JMP [CS:]disp[idxreg] 1983 cdb.last().IEV1.Vswitch = b; 1984 cdb.last().Iflags |= csseg ? CFcs : 0; // segment override 1985 } 1986 else 1987 assert(0); 1988 cgstate.stackclean--; 1989 return; 1990 } 1991 1992 /*************************************************************************/ 1993 { 1994 /* Scan a table of case values, and jump to corresponding address. 1995 * Since it relies on REPNE SCASW, it has really nothing to recommend it 1996 * over Lifthen for 32 and 64 bit code. 1997 * Note that it has not been tested with MACHOBJ (OSX). 1998 */ 1999 Lswitch: 2000 regm_t retregs = mAX; // SCASW requires AX 2001 if (dword) 2002 retregs |= mDX; 2003 else if (ncases <= 6 || config.flags4 & CFG4speed) 2004 goto Lifthen; 2005 scodelem(cdb,e,&retregs,0,true); 2006 if (dword && mswsame) 2007 { /* CMP DX,MSW */ 2008 cdb.genc2(0x81,modregrm(3,7,DX),msw); 2009 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 2010 } 2011 getregs(cdb,mCX|mDI); 2012 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2013 { 2014 if (config.flags3 & CFG3pic) 2015 { // Add in GOT 2016 getregs(cdb,mDX); 2017 cdb.genc2(CALL,0,0); // CALL L1 2018 cdb.gen1(0x58 + DI); // L1: POP EDI 2019 2020 // ADD EDI,_GLOBAL_OFFSET_TABLE_+3 2021 Symbol *gotsym = Obj.getGOTsym(); 2022 cdb.gencs(0x81,modregrm(3,0,DI),FLextern,gotsym); 2023 cdb.last().Iflags = CFoff; 2024 cdb.last().IEV2.Voffset = 3; 2025 2026 makeitextern(gotsym); 2027 2028 genmovreg(cdb, DX, DI); // MOV EDX, EDI 2029 // ADD EDI,offset of switch table 2030 cdb.gencs(0x81,modregrm(3,0,DI),FLswitch,null); 2031 cdb.last().IEV2.Vswitch = b; 2032 } 2033 } 2034 if (!(config.flags3 & CFG3pic)) 2035 { 2036 // MOV DI,offset of switch table 2037 cdb.gencs(0xC7,modregrm(3,0,DI),FLswitch,null); 2038 cdb.last().IEV2.Vswitch = b; 2039 } 2040 movregconst(cdb,CX,ncases,0); // MOV CX,ncases 2041 2042 /* The switch table will be accessed through ES:DI. 2043 * Therefore, load ES with proper segment value. 2044 */ 2045 if (config.flags3 & CFG3eseqds) 2046 { 2047 assert(!csseg); 2048 getregs(cdb,mCX); // allocate CX 2049 } 2050 else 2051 { 2052 getregs(cdb,mES|mCX); // allocate ES and CX 2053 cdb.gen1(csseg ? 0x0E : 0x1E); // PUSH CS/DS 2054 cdb.gen1(0x07); // POP ES 2055 } 2056 2057 targ_size_t disp = (ncases - 1) * _tysize[TYint]; // displacement to jump table 2058 if (dword && !mswsame) 2059 { 2060 2061 /* Build the following: 2062 L1: SCASW 2063 JNE L2 2064 CMP DX,[CS:]disp[DI] 2065 L2: LOOPNE L1 2066 */ 2067 2068 const int mod = (disp > 127) ? 2 : 1; // displacement size 2069 code *cloop = genc2(null,0xE0,0,-7 - mod - csseg); // LOOPNE scasw 2070 cdb.gen1(0xAF); // SCASW 2071 code_orflag(cdb.last(),CFtarg2); // target of jump 2072 genjmp(cdb,JNE,FLcode,cast(block *) cloop); // JNE loop 2073 // CMP DX,[CS:]disp[DI] 2074 cdb.genc1(0x39,modregrm(mod,DX,5),FLconst,disp); 2075 cdb.last().Iflags |= csseg ? CFcs : 0; // possible seg override 2076 cdb.append(cloop); 2077 disp += ncases * _tysize[TYint]; // skip over msw table 2078 } 2079 else 2080 { 2081 cdb.gen1(0xF2); // REPNE 2082 cdb.gen1(0xAF); // SCASW 2083 } 2084 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 2085 const int mod = (disp > 127) ? 2 : 1; // 1 or 2 byte displacement 2086 if (csseg) 2087 cdb.gen1(SEGCS); // table is in code segment 2088 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2089 { 2090 if (config.flags3 & CFG3pic) 2091 { // ADD EDX,(ncases-1)*2[EDI] 2092 cdb.genc1(0x03,modregrm(mod,DX,7),FLconst,disp); 2093 // JMP EDX 2094 cdb.gen2(0xFF,modregrm(3,4,DX)); 2095 } 2096 } 2097 if (!(config.flags3 & CFG3pic)) 2098 { // JMP (ncases-1)*2[DI] 2099 cdb.genc1(0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp); 2100 cdb.last().Iflags |= csseg ? CFcs : 0; 2101 } 2102 b.Btablesize = disp + _tysize[TYint] + ncases * tysize(TYnptr); 2103 //assert(b.Bcode); 2104 cgstate.stackclean--; 2105 return; 2106 } 2107 } 2108 2109 /****************************** 2110 * Output data block for a jump table (BCjmptab). 2111 * The 'holes' in the table get filled with the 2112 * default label. 2113 */ 2114 2115 void outjmptab(block *b) 2116 { 2117 if (JMPJMPTABLE && I32) 2118 return; 2119 2120 targ_llong *p = b.Bswitch; // pointer to case data 2121 size_t ncases = cast(size_t)*p++; // number of cases 2122 2123 /* Find vmin and vmax, the range of the table will be [vmin .. vmax + 1] 2124 * Must be same computation as used in doswitch(). 2125 */ 2126 targ_llong vmax = MINLL; // smallest possible llong 2127 targ_llong vmin = MAXLL; // largest possible llong 2128 for (size_t n = 0; n < ncases; n++) // find min case value 2129 { targ_llong val = p[n]; 2130 if (val > vmax) vmax = val; 2131 if (val < vmin) vmin = val; 2132 } 2133 if (vmin > 0 && vmin <= _tysize[TYint]) 2134 vmin = 0; 2135 assert(vmin <= vmax); 2136 2137 /* Segment and offset into which the jump table will be emitted 2138 */ 2139 int jmpseg = objmod.jmpTableSegment(funcsym_p); 2140 targ_size_t *poffset = &Offset(jmpseg); 2141 2142 /* Align start of jump table 2143 */ 2144 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 2145 objmod.lidata(jmpseg,*poffset,alignbytes); 2146 assert(*poffset == b.Btableoffset); // should match precomputed value 2147 2148 Symbol *gotsym = null; 2149 targ_size_t def = b.nthSucc(0).Boffset; // default address 2150 for (targ_llong u = vmin; ; u++) 2151 { targ_size_t targ = def; // default 2152 for (size_t n = 0; n < ncases; n++) 2153 { if (p[n] == u) 2154 { targ = b.nthSucc(cast(int)(n + 1)).Boffset; 2155 break; 2156 } 2157 } 2158 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2159 { 2160 if (I64) 2161 { 2162 if (config.flags3 & CFG3pic) 2163 { 2164 objmod.reftodatseg(jmpseg,*poffset,targ + (u - vmin) * 4,funcsym_p.Sseg,CFswitch); 2165 *poffset += 4; 2166 } 2167 else 2168 { 2169 objmod.reftodatseg(jmpseg,*poffset,targ,funcsym_p.Sxtrnnum,CFoffset64 | CFswitch); 2170 *poffset += 8; 2171 } 2172 } 2173 else 2174 { 2175 if (config.flags3 & CFG3pic) 2176 { 2177 assert(config.flags & CFGromable); 2178 // Want a GOTPC fixup to _GLOBAL_OFFSET_TABLE_ 2179 if (!gotsym) 2180 gotsym = Obj.getGOTsym(); 2181 objmod.reftoident(jmpseg,*poffset,gotsym,*poffset - targ,CFswitch); 2182 } 2183 else 2184 objmod.reftocodeseg(jmpseg,*poffset,targ); 2185 *poffset += 4; 2186 } 2187 } 2188 else static if (TARGET_OSX) 2189 { 2190 targ_size_t val; 2191 if (I64) 2192 val = targ - b.Btableoffset; 2193 else 2194 val = targ - b.Btablebase; 2195 objmod.write_bytes(SegData[jmpseg],4,&val); 2196 } 2197 else static if (TARGET_WINDOS) 2198 { 2199 if (I64) 2200 { 2201 targ_size_t val = targ - b.Btableoffset; 2202 objmod.write_bytes(SegData[jmpseg],4,&val); 2203 } 2204 else 2205 { 2206 objmod.reftocodeseg(jmpseg,*poffset,targ); 2207 *poffset += tysize(TYnptr); 2208 } 2209 } 2210 else 2211 assert(0); 2212 2213 if (u == vmax) // for case that (vmax == ~0) 2214 break; 2215 } 2216 } 2217 2218 2219 /****************************** 2220 * Output data block for a switch table. 2221 * Two consecutive tables, the first is the case value table, the 2222 * second is the address table. 2223 */ 2224 2225 void outswitab(block *b) 2226 { 2227 //printf("outswitab()\n"); 2228 targ_llong *p = b.Bswitch; // pointer to case data 2229 uint ncases = cast(uint)*p++; // number of cases 2230 2231 const int seg = objmod.jmpTableSegment(funcsym_p); 2232 targ_size_t *poffset = &Offset(seg); 2233 targ_size_t offset = *poffset; 2234 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 2235 objmod.lidata(seg,*poffset,alignbytes); // any alignment bytes necessary 2236 assert(*poffset == offset + alignbytes); 2237 2238 uint sz = _tysize[TYint]; 2239 assert(SegData[seg].SDseg == seg); 2240 for (uint n = 0; n < ncases; n++) // send out value table 2241 { 2242 //printf("\tcase %d, offset = x%x\n", n, *poffset); 2243 objmod.write_bytes(SegData[seg],sz,p); 2244 p++; 2245 } 2246 offset += alignbytes + sz * ncases; 2247 assert(*poffset == offset); 2248 2249 if (b.Btablesize == ncases * (REGSIZE * 2 + tysize(TYnptr))) 2250 { 2251 // Send out MSW table 2252 p -= ncases; 2253 for (uint n = 0; n < ncases; n++) 2254 { 2255 targ_size_t val = cast(targ_size_t)MSREG(*p); 2256 p++; 2257 objmod.write_bytes(SegData[seg],REGSIZE,&val); 2258 } 2259 offset += REGSIZE * ncases; 2260 assert(*poffset == offset); 2261 } 2262 2263 list_t bl = b.Bsucc; 2264 for (uint n = 0; n < ncases; n++) // send out address table 2265 { 2266 bl = list_next(bl); 2267 objmod.reftocodeseg(seg,*poffset,list_block(bl).Boffset); 2268 *poffset += tysize(TYnptr); 2269 } 2270 assert(*poffset == offset + ncases * tysize(TYnptr)); 2271 } 2272 2273 /***************************** 2274 * Return a jump opcode relevant to the elem for a JMP true. 2275 */ 2276 2277 int jmpopcode(elem *e) 2278 { 2279 tym_t tym; 2280 int zero,i,jp,op; 2281 static immutable ubyte[6][2][2] jops = 2282 [ /* <= > < >= == != <=0 >0 <0 >=0 ==0 !=0 */ 2283 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JS ,JNS,JE ,JNE] ], /* signed */ 2284 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JE ,JNE,JB ,JAE,JE ,JNE] ], /* uint */ 2285 /+ 2286 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JL ,JGE,JE ,JNE] ], /* real */ 2287 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 */ 2288 [ [JA ,JBE,JAE,JB ,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 R */ 2289 +/ 2290 ]; 2291 2292 enum 2293 { 2294 XP = (JP << 8), 2295 XNP = (JNP << 8), 2296 } 2297 static immutable uint[26][1] jfops = 2298 /* le gt lt ge eqeq ne unord lg leg ule ul uge */ 2299 [ 2300 [ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP, JNE,JNP, JBE,JC,XP|JAE, 2301 2302 /* ug ue ngt nge nlt nle ord nlg nleg nule nul nuge nug nue */ 2303 XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP, JA, JNC,XNP|JB, XNP|JBE,JNE ], /* 8087 */ 2304 ]; 2305 2306 assert(e); 2307 while (e.Eoper == OPcomma || 2308 /* The OTleaf(e.EV.E1.Eoper) is to line up with the case in cdeq() where */ 2309 /* we decide if mPSW is passed on when evaluating E2 or not. */ 2310 (e.Eoper == OPeq && OTleaf(e.EV.E1.Eoper))) 2311 { 2312 e = e.EV.E2; /* right operand determines it */ 2313 } 2314 2315 op = e.Eoper; 2316 tym_t tymx = tybasic(e.Ety); 2317 bool needsNanCheck = tyfloating(tymx) && config.inline8087 && 2318 (tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble || 2319 tymx == TYcdouble || tymx == TYcfloat || 2320 (tyxmmreg(tymx) && config.fpxmmregs && e.Ecount != e.Ecomsub) || 2321 op == OPind || 2322 (OTcall(op) && (regmask(tymx, tybasic(e.EV.E1.Eoper)) & (mST0 | XMMREGS)))); 2323 if (e.Ecount != e.Ecomsub) // comsubs just get Z bit set 2324 { 2325 if (needsNanCheck) // except for floating point values that need a NaN check 2326 return XP|JNE; 2327 else 2328 return JNE; 2329 } 2330 if (!OTrel(op)) // not relational operator 2331 { 2332 if (needsNanCheck) 2333 return XP|JNE; 2334 2335 if (op == OPu32_64) { e = e.EV.E1; op = e.Eoper; } 2336 if (op == OPu16_32) { e = e.EV.E1; op = e.Eoper; } 2337 if (op == OPu8_16) op = e.EV.E1.Eoper; 2338 return ((op >= OPbt && op <= OPbts) || op == OPbtst) ? JC : JNE; 2339 } 2340 2341 if (e.EV.E2.Eoper == OPconst) 2342 zero = !boolres(e.EV.E2); 2343 else 2344 zero = 0; 2345 2346 tym = e.EV.E1.Ety; 2347 if (tyfloating(tym)) 2348 { 2349 static if (1) 2350 { 2351 i = 0; 2352 if (config.inline8087) 2353 { i = 1; 2354 2355 static if (1) 2356 { 2357 if (rel_exception(op) || config.flags4 & CFG4fastfloat) 2358 { 2359 const bool NOSAHF = (I64 || config.fpxmmregs); 2360 if (zero) 2361 { 2362 if (NOSAHF) 2363 op = swaprel(op); 2364 } 2365 else if (NOSAHF) 2366 op = swaprel(op); 2367 else if (cmporder87(e.EV.E2)) 2368 op = swaprel(op); 2369 else 2370 { } 2371 } 2372 else 2373 { 2374 if (zero && config.target_cpu < TARGET_80386) 2375 { } 2376 else 2377 op = swaprel(op); 2378 } 2379 } 2380 else 2381 { 2382 if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386) 2383 op = swaprel(op); 2384 else if (!zero && 2385 (cmporder87(e.EV.E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat))) 2386 /* compare is reversed */ 2387 op = swaprel(op); 2388 } 2389 } 2390 jp = jfops[0][op - OPle]; 2391 goto L1; 2392 } 2393 else 2394 { 2395 i = (config.inline8087) ? (3 + cmporder87(e.EV.E2)) : 2; 2396 } 2397 } 2398 else if (tyuns(tym) || tyuns(e.EV.E2.Ety)) 2399 i = 1; 2400 else if (tyintegral(tym) || typtr(tym)) 2401 i = 0; 2402 else 2403 { 2404 debug 2405 elem_print(e); 2406 WRTYxx(tym); 2407 assert(0); 2408 } 2409 2410 jp = jops[i][zero][op - OPle]; /* table starts with OPle */ 2411 2412 /* Try to rewrite uint comparisons so they rely on just the Carry flag 2413 */ 2414 if (i == 1 && (jp == JA || jp == JBE) && 2415 (e.EV.E2.Eoper != OPconst && e.EV.E2.Eoper != OPrelconst)) 2416 { 2417 jp = (jp == JA) ? JC : JNC; 2418 } 2419 2420 L1: 2421 debug 2422 if ((jp & 0xF0) != 0x70) 2423 { 2424 WROP(op); 2425 printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp); 2426 } 2427 2428 assert((jp & 0xF0) == 0x70); 2429 return jp; 2430 } 2431 2432 /********************************** 2433 * Append code to cdb which validates pointer described by 2434 * addressing mode in *pcs. Modify addressing mode in *pcs. 2435 * Params: 2436 * cdb = append generated code to this 2437 * pcs = original addressing mode to be updated 2438 * keepmsk = mask of registers we must not destroy or use 2439 * if (keepmsk & RMstore), this will be only a store operation 2440 * into the lvalue 2441 */ 2442 2443 void cod3_ptrchk(ref CodeBuilder cdb,code *pcs,regm_t keepmsk) 2444 { 2445 ubyte sib; 2446 reg_t reg; 2447 uint flagsave; 2448 2449 assert(!I64); 2450 if (!I16 && pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2451 return; // not designed to deal with 48 bit far pointers 2452 2453 ubyte rm = pcs.Irm; 2454 assert(!(rm & 0x40)); // no disp8 or reg addressing modes 2455 2456 // If the addressing mode is already a register 2457 reg = rm & 7; 2458 if (I16) 2459 { static immutable ubyte[8] imode = [ BP,BP,BP,BP,SI,DI,BP,BX ]; 2460 2461 reg = imode[reg]; // convert [SI] to SI, etc. 2462 } 2463 regm_t idxregs = mask(reg); 2464 if ((rm & 0x80 && (pcs.IFL1 != FLoffset || pcs.IEV1.Vuns)) || 2465 !(idxregs & ALLREGS) 2466 ) 2467 { 2468 // Load the offset into a register, so we can push the address 2469 regm_t idxregs2 = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs 2470 assert(idxregs2); 2471 allocreg(cdb,&idxregs2,®,TYoffset); 2472 2473 const opsave = pcs.Iop; 2474 flagsave = pcs.Iflags; 2475 pcs.Iop = LEA; 2476 pcs.Irm |= modregrm(0,reg,0); 2477 pcs.Iflags &= ~(CFopsize | CFss | CFes | CFcs); // no prefix bytes needed 2478 cdb.gen(pcs); // LEA reg,EA 2479 2480 pcs.Iflags = flagsave; 2481 pcs.Iop = opsave; 2482 } 2483 2484 // registers destroyed by the function call 2485 //used = (mBP | ALLREGS | mES) & ~fregsaved; 2486 regm_t used = 0; // much less code generated this way 2487 2488 code *cs2 = null; 2489 regm_t tosave = used & (keepmsk | idxregs); 2490 for (int i = 0; tosave; i++) 2491 { 2492 regm_t mi = mask(i); 2493 2494 assert(i < REGMAX); 2495 if (mi & tosave) /* i = register to save */ 2496 { 2497 int push,pop; 2498 2499 stackchanged = 1; 2500 if (i == ES) 2501 { push = 0x06; 2502 pop = 0x07; 2503 } 2504 else 2505 { push = 0x50 + i; 2506 pop = push | 8; 2507 } 2508 cdb.gen1(push); // PUSH i 2509 cs2 = cat(gen1(null,pop),cs2); // POP i 2510 tosave &= ~mi; 2511 } 2512 } 2513 2514 // For 16 bit models, push a far pointer 2515 if (I16) 2516 { 2517 int segreg; 2518 2519 switch (pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2520 { case CFes: segreg = 0x06; break; 2521 case CFss: segreg = 0x16; break; 2522 case CFcs: segreg = 0x0E; break; 2523 case 0: segreg = 0x1E; break; // DS 2524 default: 2525 assert(0); 2526 } 2527 2528 // See if we should default to SS: 2529 // (Happens when BP is part of the addressing mode) 2530 if (segreg == 0x1E && (rm & 0xC0) != 0xC0 && 2531 rm & 2 && (rm & 7) != 7) 2532 { 2533 segreg = 0x16; 2534 if (config.wflags & WFssneds) 2535 pcs.Iflags |= CFss; // because BP won't be there anymore 2536 } 2537 cdb.gen1(segreg); // PUSH segreg 2538 } 2539 2540 cdb.gen1(0x50 + reg); // PUSH reg 2541 2542 // Rewrite the addressing mode in *pcs so it is just 0[reg] 2543 setaddrmode(pcs, idxregs); 2544 pcs.IFL1 = FLoffset; 2545 pcs.IEV1.Vuns = 0; 2546 2547 // Call the validation function 2548 { 2549 makeitextern(getRtlsym(RTLSYM_PTRCHK)); 2550 2551 used &= ~(keepmsk | idxregs); // regs destroyed by this exercise 2552 getregs(cdb,used); 2553 // CALL __ptrchk 2554 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_PTRCHK)); 2555 } 2556 2557 cdb.append(cs2); 2558 } 2559 2560 /*********************************** 2561 * Determine if BP can be used as a general purpose register. 2562 * Note parallels between this routine and prolog(). 2563 * Returns: 2564 * 0 can't be used, needed for frame 2565 * mBP can be used 2566 */ 2567 2568 regm_t cod3_useBP() 2569 { 2570 tym_t tym; 2571 tym_t tyf; 2572 2573 // Note that DOSX memory model cannot use EBP as a general purpose 2574 // register, as SS != DS. 2575 if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp)) 2576 goto Lcant; 2577 2578 if (anyiasm) 2579 goto Lcant; 2580 2581 tyf = funcsym_p.ty(); 2582 if (tyf & mTYnaked) // if no prolog/epilog for function 2583 goto Lcant; 2584 2585 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh) 2586 { 2587 goto Lcant; // need consistent stack frame 2588 } 2589 2590 tym = tybasic(tyf); 2591 if (tym == TYifunc) 2592 goto Lcant; 2593 2594 stackoffsets(0); 2595 localsize = Auto.offset + Fast.offset; // an estimate only 2596 // if (localsize) 2597 { 2598 if (!(config.flags4 & CFG4speed) || 2599 config.target_cpu < TARGET_Pentium || 2600 tyfarfunc(tym) || 2601 config.flags & CFGstack || 2602 localsize >= 0x100 || // arbitrary value < 0x1000 2603 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 2604 calledFinally || 2605 Alloca.size 2606 ) 2607 goto Lcant; 2608 } 2609 return mBP; 2610 2611 Lcant: 2612 return 0; 2613 } 2614 2615 /************************************************* 2616 * Generate code segment to be used later to restore a cse 2617 */ 2618 2619 bool cse_simple(code *c, elem *e) 2620 { 2621 regm_t regm; 2622 reg_t reg; 2623 int sz = tysize(e.Ety); 2624 2625 if (!I16 && // don't bother with 16 bit code 2626 e.Eoper == OPadd && 2627 sz == REGSIZE && 2628 e.EV.E2.Eoper == OPconst && 2629 e.EV.E1.Eoper == OPvar && 2630 isregvar(e.EV.E1,®m,®) && 2631 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2632 ) 2633 { 2634 memset(c,0,(*c).sizeof); 2635 2636 // Make this an LEA instruction 2637 c.Iop = LEA; 2638 buildEA(c,reg,-1,1,e.EV.E2.EV.Vuns); 2639 if (I64) 2640 { if (sz == 8) 2641 c.Irex |= REX_W; 2642 } 2643 2644 return true; 2645 } 2646 else if (e.Eoper == OPind && 2647 sz <= REGSIZE && 2648 e.EV.E1.Eoper == OPvar && 2649 isregvar(e.EV.E1,®m,®) && 2650 (I32 || I64 || regm & IDXREGS) && 2651 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2652 ) 2653 { 2654 memset(c,0,(*c).sizeof); 2655 2656 // Make this a MOV instruction 2657 c.Iop = (sz == 1) ? 0x8A : 0x8B; // MOV reg,EA 2658 buildEA(c,reg,-1,1,0); 2659 if (sz == 2 && I32) 2660 c.Iflags |= CFopsize; 2661 else if (I64) 2662 { if (sz == 8) 2663 c.Irex |= REX_W; 2664 } 2665 2666 return true; 2667 } 2668 return false; 2669 } 2670 2671 /************************** 2672 * Store `reg` to the common subexpression save area in index `slot`. 2673 * Params: 2674 * cdb = where to write code to 2675 * tym = type of value that's in `reg` 2676 * reg = register to save 2677 * slot = index into common subexpression save area 2678 */ 2679 void gen_storecse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2680 { 2681 // MOV slot[BP],reg 2682 if (isXMMreg(reg) && config.fpxmmregs) // watch out for ES 2683 { 2684 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2685 const op = xmmstore(tym, aligned); 2686 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2687 return; 2688 } 2689 opcode_t op = STO; // normal mov 2690 if (reg == ES) 2691 { 2692 reg = 0; // the real reg number 2693 op = 0x8C; // segment reg mov 2694 } 2695 cdb.genc1(op,modregxrm(2, reg, BPRM),FLcs,cast(targ_uns)slot); 2696 if (I64) 2697 code_orrex(cdb.last(), REX_W); 2698 } 2699 2700 void gen_testcse(ref CodeBuilder cdb, tym_t tym, uint sz, size_t slot) 2701 { 2702 // CMP slot[BP],0 2703 cdb.genc(sz == 1 ? 0x80 : 0x81,modregrm(2,7,BPRM), 2704 FLcs,cast(targ_uns)slot, FLconst,cast(targ_uns) 0); 2705 if ((I64 || I32) && sz == 2) 2706 cdb.last().Iflags |= CFopsize; 2707 if (I64 && sz == 8) 2708 code_orrex(cdb.last(), REX_W); 2709 } 2710 2711 void gen_loadcse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2712 { 2713 // MOV reg,slot[BP] 2714 if (isXMMreg(reg) && config.fpxmmregs) 2715 { 2716 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2717 const op = xmmload(tym, aligned); 2718 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2719 return; 2720 } 2721 opcode_t op = LOD; 2722 if (reg == ES) 2723 { 2724 op = 0x8E; 2725 reg = 0; 2726 } 2727 cdb.genc1(op,modregxrm(2,reg,BPRM),FLcs,cast(targ_uns)slot); 2728 if (I64) 2729 code_orrex(cdb.last(), REX_W); 2730 } 2731 2732 /*************************************** 2733 * Gen code for OPframeptr 2734 */ 2735 2736 void cdframeptr(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2737 { 2738 regm_t retregs = *pretregs & allregs; 2739 if (!retregs) 2740 retregs = allregs; 2741 reg_t reg; 2742 allocreg(cdb,&retregs, ®, TYint); 2743 2744 code cs; 2745 cs.Iop = ESCAPE | ESCframeptr; 2746 cs.Iflags = 0; 2747 cs.Irex = 0; 2748 cs.Irm = cast(ubyte)reg; 2749 cdb.gen(&cs); 2750 fixresult(cdb,e,retregs,pretregs); 2751 } 2752 2753 /*************************************** 2754 * Gen code for load of _GLOBAL_OFFSET_TABLE_. 2755 * This value gets cached in the local variable 'localgot'. 2756 */ 2757 2758 void cdgot(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2759 { 2760 static if (TARGET_OSX) 2761 { 2762 regm_t retregs = *pretregs & allregs; 2763 if (!retregs) 2764 retregs = allregs; 2765 reg_t reg; 2766 allocreg(cdb,&retregs, ®, TYnptr); 2767 2768 cdb.genc(CALL,0,0,0,FLgot,0); // CALL L1 2769 cdb.gen1(0x58 + reg); // L1: POP reg 2770 2771 fixresult(cdb,e,retregs,pretregs); 2772 } 2773 else static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2774 { 2775 regm_t retregs = *pretregs & allregs; 2776 if (!retregs) 2777 retregs = allregs; 2778 reg_t reg; 2779 allocreg(cdb,&retregs, ®, TYnptr); 2780 2781 cdb.genc2(CALL,0,0); // CALL L1 2782 cdb.gen1(0x58 + reg); // L1: POP reg 2783 2784 // ADD reg,_GLOBAL_OFFSET_TABLE_+3 2785 Symbol *gotsym = Obj.getGOTsym(); 2786 cdb.gencs(0x81,modregrm(3,0,reg),FLextern,gotsym); 2787 /* Because the 2:3 offset from L1: is hardcoded, 2788 * this sequence of instructions must not 2789 * have any instructions in between, 2790 * so set CFvolatile to prevent the scheduler from rearranging it. 2791 */ 2792 code *cgot = cdb.last(); 2793 cgot.Iflags = CFoff | CFvolatile; 2794 cgot.IEV2.Voffset = (reg == AX) ? 2 : 3; 2795 2796 makeitextern(gotsym); 2797 fixresult(cdb,e,retregs,pretregs); 2798 } 2799 else 2800 assert(0); 2801 } 2802 2803 /************************************************** 2804 * Load contents of localgot into EBX. 2805 */ 2806 2807 void load_localgot(ref CodeBuilder cdb) 2808 { 2809 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2810 { 2811 if (config.flags3 & CFG3pic && I32) 2812 { 2813 if (localgot && !(localgot.Sflags & SFLdead)) 2814 { 2815 localgot.Sflags &= ~GTregcand; // because this hack doesn't work with reg allocator 2816 elem *e = el_var(localgot); 2817 regm_t retregs = mBX; 2818 codelem(cdb,e,&retregs,false); 2819 el_free(e); 2820 } 2821 else 2822 { 2823 elem *e = el_long(TYnptr, 0); 2824 e.Eoper = OPgot; 2825 regm_t retregs = mBX; 2826 codelem(cdb,e,&retregs,false); 2827 el_free(e); 2828 } 2829 } 2830 } 2831 } 2832 2833 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2834 { 2835 /***************************** 2836 * Returns: 2837 * # of bytes stored 2838 */ 2839 2840 2841 private int obj_namestring(char *p,const(char)* name) 2842 { 2843 size_t len = strlen(name); 2844 if (len > 255) 2845 { 2846 short *ps = cast(short *)p; 2847 p[0] = 0xFF; 2848 p[1] = 0; 2849 ps[1] = cast(short)len; 2850 memcpy(p + 4,name,len); 2851 const int ONS_OHD = 4; // max # of extra bytes added by obj_namestring() 2852 len += ONS_OHD; 2853 } 2854 else 2855 { 2856 p[0] = cast(char)len; 2857 memcpy(p + 1,name,len); 2858 len++; 2859 } 2860 return cast(int)len; 2861 } 2862 } 2863 2864 void genregs(ref CodeBuilder cdb,opcode_t op,uint dstreg,uint srcreg) 2865 { 2866 return cdb.gen2(op,modregxrmx(3,dstreg,srcreg)); 2867 } 2868 2869 void gentstreg(ref CodeBuilder cdb, uint t) 2870 { 2871 cdb.gen2(0x85,modregxrmx(3,t,t)); // TEST t,t 2872 code_orflag(cdb.last(),CFpsw); 2873 } 2874 2875 void genpush(ref CodeBuilder cdb, reg_t reg) 2876 { 2877 cdb.gen1(0x50 + (reg & 7)); 2878 if (reg & 8) 2879 code_orrex(cdb.last(), REX_B); 2880 } 2881 2882 void genpop(ref CodeBuilder cdb, reg_t reg) 2883 { 2884 cdb.gen1(0x58 + (reg & 7)); 2885 if (reg & 8) 2886 code_orrex(cdb.last(), REX_B); 2887 } 2888 2889 /************************** 2890 * Generate a MOV to,from register instruction. 2891 * Smart enough to dump redundant register moves, and segment 2892 * register moves. 2893 */ 2894 2895 code *genmovreg(uint to,uint from) 2896 { 2897 CodeBuilder cdb; cdb.ctor(); 2898 genmovreg(cdb, to, from); 2899 return cdb.finish(); 2900 } 2901 2902 void genmovreg(ref CodeBuilder cdb,uint to,uint from) 2903 { 2904 genmovreg(cdb, to, from, TYMAX); 2905 } 2906 2907 void genmovreg(ref CodeBuilder cdb, uint to, uint from, tym_t tym) 2908 { 2909 // register kind. ex: GPR,XMM,SEG 2910 static uint _K(uint reg) 2911 { 2912 switch (reg) 2913 { 2914 case ES: return ES; 2915 case XMM15: 2916 case XMM0: .. case XMM7: return XMM0; 2917 case AX: .. case R15: return AX; 2918 default: return reg; 2919 } 2920 } 2921 2922 // kind combination (order kept) 2923 static uint _X(uint to, uint from) { return (_K(to) << 8) + _K(from); } 2924 2925 if (to != from) 2926 { 2927 if (tym == TYMAX) tym = TYsize_t; // avoid register slicing 2928 switch (_X(to, from)) 2929 { 2930 case _X(AX, AX): 2931 genregs(cdb, 0x89, from, to); // MOV to,from 2932 if (I64 && tysize(tym) >= 8) 2933 code_orrex(cdb.last(), REX_W); 2934 break; 2935 2936 case _X(XMM0, XMM0): // MOVD/Q to,from 2937 genregs(cdb, xmmload(tym), to-XMM0, from-XMM0); 2938 checkSetVex(cdb.last(), tym); 2939 break; 2940 2941 case _X(AX, XMM0): // MOVD/Q to,from 2942 genregs(cdb, STOD, from-XMM0, to); 2943 if (I64 && tysize(tym) >= 8) 2944 code_orrex(cdb.last(), REX_W); 2945 checkSetVex(cdb.last(), tym); 2946 break; 2947 2948 case _X(XMM0, AX): // MOVD/Q to,from 2949 genregs(cdb, LODD, to-XMM0, from); 2950 if (I64 && tysize(tym) >= 8) 2951 code_orrex(cdb.last(), REX_W); 2952 checkSetVex(cdb.last(), tym); 2953 break; 2954 2955 case _X(ES, AX): 2956 assert(tysize(tym) <= REGSIZE); 2957 genregs(cdb, 0x8E, 0, from); 2958 break; 2959 2960 case _X(AX, ES): 2961 assert(tysize(tym) <= REGSIZE); 2962 genregs(cdb, 0x8C, 0, to); 2963 break; 2964 2965 default: 2966 debug printf("genmovreg(to = %s, from = %s)\n" 2967 , regm_str(mask(to)), regm_str(mask(from))); 2968 assert(0); 2969 } 2970 } 2971 } 2972 2973 /*************************************** 2974 * Generate immediate multiply instruction for r1=r2*imm. 2975 * Optimize it into LEA's if we can. 2976 */ 2977 2978 void genmulimm(ref CodeBuilder cdb,uint r1,uint r2,targ_int imm) 2979 { 2980 // These optimizations should probably be put into pinholeopt() 2981 switch (imm) 2982 { 2983 case 1: 2984 genmovreg(cdb,r1,r2); 2985 break; 2986 2987 case 5: 2988 { 2989 code cs; 2990 cs.Iop = LEA; 2991 cs.Iflags = 0; 2992 cs.Irex = 0; 2993 buildEA(&cs,r2,r2,4,0); 2994 cs.orReg(r1); 2995 cdb.gen(&cs); 2996 break; 2997 } 2998 2999 default: 3000 cdb.genc2(0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm 3001 break; 3002 } 3003 } 3004 3005 /****************************** 3006 * Load CX with the value of _AHSHIFT. 3007 */ 3008 3009 void genshift(ref CodeBuilder cdb) 3010 { 3011 version (SCPP) 3012 { 3013 // Set up ahshift to trick ourselves into giving the right fixup, 3014 // which must be seg-relative, external frame, external target. 3015 cdb.gencs(0xC7,modregrm(3,0,CX),FLfunc,getRtlsym(RTLSYM_AHSHIFT)); 3016 cdb.last().Iflags |= CFoff; 3017 } 3018 else 3019 assert(0); 3020 } 3021 3022 /****************************** 3023 * Move constant value into reg. 3024 * Take advantage of existing values in registers. 3025 * If flags & mPSW 3026 * set flags based on result 3027 * Else if flags & 8 3028 * do not disturb flags 3029 * Else 3030 * don't care about flags 3031 * If flags & 1 then byte move 3032 * If flags & 2 then short move (for I32 and I64) 3033 * If flags & 4 then don't disturb unused portion of register 3034 * If flags & 16 then reg is a byte register AL..BH 3035 * If flags & 64 (0x40) then 64 bit move (I64 only) 3036 * Returns: 3037 * code (if any) generated 3038 */ 3039 3040 void movregconst(ref CodeBuilder cdb,reg_t reg,targ_size_t value,regm_t flags) 3041 { 3042 reg_t r; 3043 regm_t mreg; 3044 3045 //printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask(reg)), value, value, flags); 3046 3047 regm_t regm = regcon.immed.mval & mask(reg); 3048 targ_size_t regv = regcon.immed.value[reg]; 3049 3050 if (flags & 1) // 8 bits 3051 { 3052 value &= 0xFF; 3053 regm &= BYTEREGS; 3054 3055 // If we already have the right value in the right register 3056 if (regm && (regv & 0xFF) == value) 3057 goto L2; 3058 3059 if (flags & 16 && reg & 4 && // if an H byte register 3060 regcon.immed.mval & mask(reg & 3) && 3061 (((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value) 3062 goto L2; 3063 3064 /* Avoid byte register loads to avoid dependency stalls. 3065 */ 3066 if ((I32 || I64) && 3067 config.target_cpu >= TARGET_PentiumPro && !(flags & 4)) 3068 goto L3; 3069 3070 // See if another register has the right value 3071 r = 0; 3072 for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1) 3073 { 3074 if (mreg & 1) 3075 { 3076 if ((regcon.immed.value[r] & 0xFF) == value) 3077 { 3078 genregs(cdb,0x8A,reg,r); // MOV regL,rL 3079 if (I64 && reg >= 4 || r >= 4) 3080 code_orrex(cdb.last(), REX); 3081 goto L2; 3082 } 3083 if (!(I64 && reg >= 4) && 3084 r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value) 3085 { 3086 genregs(cdb,0x8A,reg,r | 4); // MOV regL,rH 3087 goto L2; 3088 } 3089 } 3090 r++; 3091 } 3092 3093 if (value == 0 && !(flags & 8)) 3094 { 3095 if (!(flags & 4) && // if we can set the whole register 3096 !(flags & 16 && reg & 4)) // and reg is not an H register 3097 { 3098 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3099 regimmed_set(reg,value); 3100 regv = 0; 3101 } 3102 else 3103 genregs(cdb,0x30,reg,reg); // XOR regL,regL 3104 flags &= ~mPSW; // flags already set by XOR 3105 } 3106 else 3107 { 3108 cdb.genc2(0xC6,modregrmx(3,0,reg),value); // MOV regL,value 3109 if (reg >= 4 && I64) 3110 { 3111 code_orrex(cdb.last(), REX); 3112 } 3113 } 3114 L2: 3115 if (flags & mPSW) 3116 genregs(cdb,0x84,reg,reg); // TEST regL,regL 3117 3118 if (regm) 3119 // Set just the 'L' part of the register value 3120 regimmed_set(reg,(regv & ~cast(targ_size_t)0xFF) | value); 3121 else if (flags & 16 && reg & 4 && regcon.immed.mval & mask(reg & 3)) 3122 // Set just the 'H' part of the register value 3123 regimmed_set((reg & 3),(regv & ~cast(targ_size_t)0xFF00) | (value << 8)); 3124 return; 3125 } 3126 L3: 3127 if (I16) 3128 value = cast(targ_short) value; // sign-extend MSW 3129 else if (I32) 3130 value = cast(targ_int) value; 3131 3132 if (!I16 && flags & 2) // load 16 bit value 3133 { 3134 value &= 0xFFFF; 3135 if (value && !(flags & mPSW)) 3136 { 3137 cdb.genc2(0xC7,modregrmx(3,0,reg),value); // MOV reg,value 3138 regimmed_set(reg, value); 3139 return; 3140 } 3141 } 3142 3143 // If we already have the right value in the right register 3144 if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64)) 3145 { 3146 if (flags & mPSW) 3147 gentstreg(cdb,reg); 3148 } 3149 else if (flags & 64 && regm && regv == value) 3150 { // Look at the full 64 bits 3151 if (flags & mPSW) 3152 { 3153 gentstreg(cdb,reg); 3154 code_orrex(cdb.last(), REX_W); 3155 } 3156 } 3157 else 3158 { 3159 if (flags & mPSW) 3160 { 3161 switch (value) 3162 { 3163 case 0: 3164 genregs(cdb,0x31,reg,reg); 3165 break; 3166 3167 case 1: 3168 if (I64) 3169 goto L4; 3170 genregs(cdb,0x31,reg,reg); 3171 goto inc; 3172 3173 case ~cast(targ_size_t)0: 3174 if (I64) 3175 goto L4; 3176 genregs(cdb,0x31,reg,reg); 3177 goto dec; 3178 3179 default: 3180 L4: 3181 if (flags & 64) 3182 { 3183 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 3184 gentstreg(cdb,reg); 3185 code_orrex(cdb.last(), REX_W); 3186 } 3187 else 3188 { 3189 value &= 0xFFFFFFFF; 3190 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 3191 gentstreg(cdb,reg); 3192 } 3193 break; 3194 } 3195 } 3196 else 3197 { 3198 // Look for single byte conversion 3199 if (regcon.immed.mval & mAX) 3200 { 3201 if (I32) 3202 { 3203 if (reg == AX && value == cast(targ_short) regv) 3204 { 3205 cdb.gen1(0x98); // CWDE 3206 goto done; 3207 } 3208 if (reg == DX && 3209 value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) && 3210 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 3211 ) 3212 { 3213 cdb.gen1(0x99); // CDQ 3214 goto done; 3215 } 3216 } 3217 else if (I16) 3218 { 3219 if (reg == AX && 3220 cast(targ_short) value == cast(byte) regv) 3221 { 3222 cdb.gen1(0x98); // CBW 3223 goto done; 3224 } 3225 3226 if (reg == DX && 3227 cast(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? cast(targ_short) 0xFFFF : cast(targ_short) 0) && 3228 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 3229 ) 3230 { 3231 cdb.gen1(0x99); // CWD 3232 goto done; 3233 } 3234 } 3235 } 3236 if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486) 3237 { 3238 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3239 goto done; 3240 } 3241 3242 if (!I64 && regm && !(flags & 8)) 3243 { 3244 if (regv + 1 == value || 3245 // Catch case of (0xFFFF+1 == 0) for 16 bit compiles 3246 (I16 && cast(targ_short)(regv + 1) == cast(targ_short)value)) 3247 { 3248 inc: 3249 cdb.gen1(0x40 + reg); // INC reg 3250 goto done; 3251 } 3252 if (regv - 1 == value) 3253 { 3254 dec: 3255 cdb.gen1(0x48 + reg); // DEC reg 3256 goto done; 3257 } 3258 } 3259 3260 // See if another register has the right value 3261 r = 0; 3262 for (mreg = regcon.immed.mval; mreg; mreg >>= 1) 3263 { 3264 debug 3265 assert(!I16 || regcon.immed.value[r] == cast(targ_short)regcon.immed.value[r]); 3266 3267 if (mreg & 1 && regcon.immed.value[r] == value) 3268 { 3269 genmovreg(cdb,reg,r); 3270 goto done; 3271 } 3272 r++; 3273 } 3274 3275 if (value == 0 && !(flags & 8)) 3276 { 3277 genregs(cdb,0x31,reg,reg); // XOR reg,reg 3278 } 3279 else 3280 { // See if we can just load a byte 3281 if (regm & BYTEREGS && 3282 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro) 3283 ) 3284 { 3285 if ((regv & ~cast(targ_size_t)0xFF) == (value & ~cast(targ_size_t)0xFF)) 3286 { 3287 movregconst(cdb,reg,value,(flags & 8) |4|1); // load regL 3288 return; 3289 } 3290 if (regm & (mAX|mBX|mCX|mDX) && 3291 (regv & ~cast(targ_size_t)0xFF00) == (value & ~cast(targ_size_t)0xFF00) && 3292 !I64) 3293 { 3294 movregconst(cdb,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH 3295 return; 3296 } 3297 } 3298 if (flags & 64) 3299 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 3300 else 3301 { 3302 value &= 0xFFFFFFFF; 3303 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 3304 } 3305 } 3306 } 3307 done: 3308 regimmed_set(reg,value); 3309 } 3310 } 3311 3312 /************************** 3313 * Generate a jump instruction. 3314 */ 3315 3316 void genjmp(ref CodeBuilder cdb,opcode_t op,uint fltarg,block *targ) 3317 { 3318 code cs; 3319 cs.Iop = op & 0xFF; 3320 cs.Iflags = 0; 3321 cs.Irex = 0; 3322 if (op != JMP && op != 0xE8) // if not already long branch 3323 cs.Iflags = CFjmp16; // assume long branch for op = 0x7x 3324 cs.IFL2 = cast(ubyte)fltarg; // FLblock (or FLcode) 3325 cs.IEV2.Vblock = targ; // target block (or code) 3326 if (fltarg == FLcode) 3327 (cast(code *)targ).Iflags |= CFtarg; 3328 3329 if (config.flags4 & CFG4fastfloat) // if fast floating point 3330 { 3331 cdb.gen(&cs); 3332 return; 3333 } 3334 3335 switch (op & 0xFF00) // look at second jump opcode 3336 { 3337 // The JP and JNP come from floating point comparisons 3338 case JP << 8: 3339 cdb.gen(&cs); 3340 cs.Iop = JP; 3341 cdb.gen(&cs); 3342 break; 3343 3344 case JNP << 8: 3345 { 3346 // Do a JP around the jump instruction 3347 code *cnop = gennop(null); 3348 genjmp(cdb,JP,FLcode,cast(block *) cnop); 3349 cdb.gen(&cs); 3350 cdb.append(cnop); 3351 break; 3352 } 3353 3354 case 1 << 8: // toggled no jump 3355 case 0 << 8: 3356 cdb.gen(&cs); 3357 break; 3358 3359 default: 3360 debug 3361 printf("jop = x%x\n",op); 3362 assert(0); 3363 } 3364 } 3365 3366 /********************************************* 3367 * Generate first part of prolog for interrupt function. 3368 */ 3369 void prolog_ifunc(ref CodeBuilder cdb, tym_t* tyf) 3370 { 3371 static immutable ubyte[4] ops2 = [ 0x60,0x1E,0x06,0 ]; 3372 static immutable ubyte[11] ops0 = [ 0x50,0x51,0x52,0x53, 3373 0x54,0x55,0x56,0x57, 3374 0x1E,0x06,0 ]; 3375 3376 immutable(ubyte)* p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 3377 do 3378 cdb.gen1(*p); 3379 while (*++p); 3380 3381 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3382 if (localsize) 3383 cod3_stackadj(cdb, cast(int)localsize); 3384 3385 *tyf |= mTYloadds; 3386 } 3387 3388 void prolog_ifunc2(ref CodeBuilder cdb, tym_t tyf, tym_t tym, bool pushds) 3389 { 3390 /* Determine if we need to reload DS */ 3391 if (tyf & mTYloadds) 3392 { 3393 if (!pushds) // if not already pushed 3394 cdb.gen1(0x1E); // PUSH DS 3395 spoff += _tysize[TYint]; 3396 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); // MOV AX,DGROUP 3397 code *c = cdb.last(); 3398 c.IEV2.Vseg = DATA; 3399 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3400 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3401 useregs(mAX); 3402 } 3403 3404 if (tym == TYifunc) 3405 cdb.gen1(0xFC); // CLD 3406 } 3407 3408 void prolog_16bit_windows_farfunc(ref CodeBuilder cdb, tym_t* tyf, bool* pushds) 3409 { 3410 int wflags = config.wflags; 3411 if (wflags & WFreduced && !(*tyf & mTYexport)) 3412 { // reduced prolog/epilog for non-exported functions 3413 wflags &= ~(WFdgroup | WFds | WFss); 3414 } 3415 3416 getregsNoSave(mAX); // should not have any value in AX 3417 3418 int segreg; 3419 switch (wflags & (WFdgroup | WFds | WFss)) 3420 { 3421 case WFdgroup: // MOV AX,DGROUP 3422 { 3423 if (wflags & WFreduced) 3424 *tyf &= ~mTYloadds; // remove redundancy 3425 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); 3426 code *c = cdb.last(); 3427 c.IEV2.Vseg = DATA; 3428 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3429 break; 3430 } 3431 3432 case WFss: 3433 segreg = 2; // SS 3434 goto Lmovax; 3435 3436 case WFds: 3437 segreg = 3; // DS 3438 Lmovax: 3439 cdb.gen2(0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg 3440 if (wflags & WFds) 3441 cdb.gen1(0x90); // NOP 3442 break; 3443 3444 case 0: 3445 break; 3446 3447 default: 3448 debug 3449 printf("config.wflags = x%x\n",config.wflags); 3450 assert(0); 3451 } 3452 if (wflags & WFincbp) 3453 cdb.gen1(0x40 + BP); // INC BP 3454 cdb.gen1(0x50 + BP); // PUSH BP 3455 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3456 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 3457 { 3458 cdb.gen1(0x1E); // PUSH DS 3459 *pushds = true; 3460 BPoff = -REGSIZE; 3461 } 3462 if (wflags & (WFds | WFss | WFdgroup)) 3463 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3464 } 3465 3466 /********************************************** 3467 * Set up frame register. 3468 * Params: 3469 * cdb = write generated code here 3470 * farfunc = true if a far function 3471 * enter = set to true if ENTER instruction can be used, false otherwise 3472 * xlocalsize = amount of local variables, set to amount to be subtracted from stack pointer 3473 * cfa_offset = set to frame pointer's offset from the CFA 3474 * Returns: 3475 * generated code 3476 */ 3477 void prolog_frame(ref CodeBuilder cdb, bool farfunc, ref uint xlocalsize, out bool enter, out int cfa_offset) 3478 { 3479 //printf("prolog_frame\n"); 3480 cfa_offset = 0; 3481 3482 if (0 && config.exe == EX_WIN64) 3483 { 3484 // PUSH RBP 3485 // LEA RBP,0[RSP] 3486 cdb. gen1(0x50 + BP); 3487 cdb.genc1(LEA,(REX_W<<16) | (modregrm(0,4,SP)<<8) | modregrm(2,BP,4),FLconst,0); 3488 enter = false; 3489 return; 3490 } 3491 3492 if (config.wflags & WFincbp && farfunc) 3493 cdb.gen1(0x40 + BP); // INC BP 3494 if (config.target_cpu < TARGET_80286 || 3495 config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_OSX64 | EX_FREEBSD | EX_FREEBSD64 | EX_DRAGONFLYBSD64 | EX_SOLARIS | EX_SOLARIS64 | EX_WIN64) || 3496 !localsize || 3497 config.flags & CFGstack || 3498 (xlocalsize >= 0x1000 && config.exe & EX_flat) || 3499 localsize >= 0x10000 || 3500 (NTEXCEPTIONS == 2 && 3501 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))) || 3502 (config.target_cpu >= TARGET_80386 && 3503 config.flags4 & CFG4speed) 3504 ) 3505 { 3506 cdb.gen1(0x50 + BP); // PUSH BP 3507 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3508 if (I64) 3509 code_orrex(cdb.last(), REX_W); // MOV RBP,RSP 3510 if ((config.objfmt & (OBJ_ELF | OBJ_MACH)) && config.fulltypes) 3511 // Don't reorder instructions, as dwarf CFA relies on it 3512 code_orflag(cdb.last(), CFvolatile); 3513 static if (NTEXCEPTIONS == 2) 3514 { 3515 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH)) 3516 { 3517 nteh_prolog(cdb); 3518 int sz = nteh_contextsym_size(); 3519 assert(sz != 0); // should be 5*4, not 0 3520 xlocalsize -= sz; // sz is already subtracted from ESP 3521 // by nteh_prolog() 3522 } 3523 } 3524 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3525 config.ehmethod == EHmethod.EH_DWARF) 3526 { 3527 int off = 2 * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3528 dwarf_CFA_set_loc(1); // address after PUSH EBP 3529 dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP] 3530 dwarf_CFA_offset(BP, -off); // EBP is at 0[ESP] 3531 dwarf_CFA_set_loc(I64 ? 4 : 3); // address after MOV EBP,ESP 3532 /* Oddly, the CFA is not the same as the frame pointer, 3533 * which is why the offset of BP is set to 8 3534 */ 3535 dwarf_CFA_set_reg_offset(BP, off); // CFA is now 0[EBP] 3536 cfa_offset = off; // remember the difference between the CFA and the frame pointer 3537 } 3538 enter = false; /* do not use ENTER instruction */ 3539 } 3540 else 3541 enter = true; 3542 } 3543 3544 /********************************************** 3545 * Enforce stack alignment. 3546 * Input: 3547 * cdb code builder. 3548 * Returns: 3549 * generated code 3550 */ 3551 void prolog_stackalign(ref CodeBuilder cdb) 3552 { 3553 if (!enforcealign) 3554 return; 3555 3556 const offset = (hasframe ? 2 : 1) * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3557 if (offset & (STACKALIGN - 1) || TARGET_STACKALIGN < STACKALIGN) 3558 cod3_stackalign(cdb, STACKALIGN); 3559 } 3560 3561 void prolog_frameadj(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool enter, bool* pushalloc) 3562 { 3563 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3564 static if (TARGET_LINUX) 3565 { 3566 bool check = false; // seems that Linux doesn't need to fault in stack pages 3567 } 3568 else 3569 { 3570 bool check = (config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check 3571 || (TARGET_WINDOS && xlocalsize >= 0x1000 && config.exe & EX_flat); 3572 } 3573 if (check) 3574 { 3575 if (I16) 3576 { 3577 // BUG: Won't work if parameter is passed in AX 3578 movregconst(cdb,AX,xlocalsize,false); // MOV AX,localsize 3579 makeitextern(getRtlsym(RTLSYM_CHKSTK)); 3580 // CALL _chkstk 3581 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_CHKSTK)); 3582 useregs((ALLREGS | mBP | mES) & ~getRtlsym(RTLSYM_CHKSTK).Sregsaved); 3583 } 3584 else 3585 { 3586 /* Watch out for 64 bit code where EDX is passed as a register parameter 3587 */ 3588 reg_t reg = I64 ? R11 : DX; // scratch register 3589 3590 /* MOV EDX, xlocalsize/0x1000 3591 * L1: SUB ESP, 0x1000 3592 * TEST [ESP],ESP 3593 * DEC EDX 3594 * JNE L1 3595 * SUB ESP, xlocalsize % 0x1000 3596 */ 3597 movregconst(cdb, reg, xlocalsize / 0x1000, false); 3598 cod3_stackadj(cdb, 0x1000); 3599 code_orflag(cdb.last(), CFtarg2); 3600 cdb.gen2sib(0x85, modregrm(0,SP,4),modregrm(0,4,SP)); 3601 if (I64) 3602 { cdb.gen2(0xFF, modregrmx(3,1,R11)); // DEC R11D 3603 cdb.genc2(JNE,0,cast(targ_uns)-15); 3604 } 3605 else 3606 { cdb.gen1(0x48 + DX); // DEC EDX 3607 cdb.genc2(JNE,0,cast(targ_uns)-12); 3608 } 3609 regimmed_set(reg,0); // reg is now 0 3610 cod3_stackadj(cdb, xlocalsize & 0xFFF); 3611 useregs(mask(reg)); 3612 } 3613 } 3614 else 3615 { 3616 if (enter) 3617 { // ENTER xlocalsize,0 3618 cdb.genc(ENTER,0,FLconst,xlocalsize,FLconst,cast(targ_uns) 0); 3619 assert(!(config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)); // didn't emit Dwarf data 3620 } 3621 else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized) 3622 { 3623 cdb. gen1(0x50 + pushallocreg); // PUSH AX 3624 // Do this to prevent an -x[EBP] to be moved in 3625 // front of the push. 3626 code_orflag(cdb.last(),CFvolatile); 3627 *pushalloc = true; 3628 } 3629 else 3630 cod3_stackadj(cdb, xlocalsize); 3631 } 3632 } 3633 3634 void prolog_frameadj2(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool* pushalloc) 3635 { 3636 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3637 if (xlocalsize == REGSIZE) 3638 { 3639 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3640 *pushalloc = true; 3641 } 3642 else if (xlocalsize == 2 * REGSIZE) 3643 { 3644 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3645 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3646 *pushalloc = true; 3647 } 3648 else 3649 cod3_stackadj(cdb, xlocalsize); 3650 } 3651 3652 void prolog_setupalloca(ref CodeBuilder cdb) 3653 { 3654 //printf("prolog_setupalloca() offset x%x size x%x alignment x%x\n", 3655 //cast(int)Alloca.offset, cast(int)Alloca.size, cast(int)Alloca.alignment); 3656 // Set up magic parameter for alloca() 3657 // MOV -REGSIZE[BP],localsize - BPoff 3658 cdb.genc(0xC7,modregrm(2,0,BPRM), 3659 FLconst,Alloca.offset + BPoff, 3660 FLconst,localsize - BPoff); 3661 if (I64) 3662 code_orrex(cdb.last(), REX_W); 3663 } 3664 3665 /************************************** 3666 * Save registers that the function destroys, 3667 * but that the ABI says should be preserved across 3668 * function calls. 3669 * 3670 * Emit Dwarf info for these saves. 3671 * Params: 3672 * cdb = append generated instructions to this 3673 * topush = mask of registers to push 3674 * cfa_offset = offset of frame pointer from CFA 3675 */ 3676 3677 void prolog_saveregs(ref CodeBuilder cdb, regm_t topush, int cfa_offset) 3678 { 3679 if (pushoffuse) 3680 { 3681 // Save to preallocated section in the stack frame 3682 int xmmtopush = numbitsset(topush & XMMREGS); // XMM regs take 16 bytes 3683 int gptopush = numbitsset(topush) - xmmtopush; // general purpose registers to save 3684 targ_size_t xmmoffset = pushoff + BPoff; 3685 if (!hasframe || enforcealign) 3686 xmmoffset += EBPtoESP; 3687 targ_size_t gpoffset = xmmoffset + xmmtopush * 16; 3688 while (topush) 3689 { 3690 reg_t reg = findreg(topush); 3691 topush &= ~mask(reg); 3692 if (isXMMreg(reg)) 3693 { 3694 if (hasframe && !enforcealign) 3695 { 3696 // MOVUPD xmmoffset[EBP],xmm 3697 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3698 } 3699 else 3700 { 3701 // MOVUPD xmmoffset[ESP],xmm 3702 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3703 } 3704 xmmoffset += 16; 3705 } 3706 else 3707 { 3708 if (hasframe && !enforcealign) 3709 { 3710 // MOV gpoffset[EBP],reg 3711 cdb.genc1(0x89,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3712 } 3713 else 3714 { 3715 // MOV gpoffset[ESP],reg 3716 cdb.genc1(0x89,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3717 } 3718 if (I64) 3719 code_orrex(cdb.last(), REX_W); 3720 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3721 config.ehmethod == EHmethod.EH_DWARF) 3722 { // Emit debug_frame data giving location of saved register 3723 code *c = cdb.finish(); 3724 pinholeopt(c, null); 3725 dwarf_CFA_set_loc(calcblksize(c)); // address after save 3726 dwarf_CFA_offset(reg, cast(int)(gpoffset - cfa_offset)); 3727 cdb.reset(); 3728 cdb.append(c); 3729 } 3730 gpoffset += REGSIZE; 3731 } 3732 } 3733 } 3734 else 3735 { 3736 while (topush) /* while registers to push */ 3737 { 3738 reg_t reg = findreg(topush); 3739 topush &= ~mask(reg); 3740 if (isXMMreg(reg)) 3741 { 3742 // SUB RSP,16 3743 cod3_stackadj(cdb, 16); 3744 // MOVUPD 0[RSP],xmm 3745 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3746 EBPtoESP += 16; 3747 spoff += 16; 3748 } 3749 else 3750 { 3751 genpush(cdb, reg); 3752 EBPtoESP += REGSIZE; 3753 spoff += REGSIZE; 3754 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3755 config.ehmethod == EHmethod.EH_DWARF) 3756 { // Emit debug_frame data giving location of saved register 3757 // relative to 0[EBP] 3758 code *c = cdb.finish(); 3759 pinholeopt(c, null); 3760 dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg 3761 dwarf_CFA_offset(reg, -EBPtoESP - cfa_offset); 3762 cdb.reset(); 3763 cdb.append(c); 3764 } 3765 } 3766 } 3767 } 3768 } 3769 3770 /************************************** 3771 * Undo prolog_saveregs() 3772 */ 3773 3774 private void epilog_restoreregs(ref CodeBuilder cdb, regm_t topop) 3775 { 3776 debug 3777 if (topop & ~(XMMREGS | 0xFFFF)) 3778 printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg)); 3779 3780 assert(!(topop & ~(XMMREGS | 0xFFFF))); 3781 if (pushoffuse) 3782 { 3783 // Save to preallocated section in the stack frame 3784 int xmmtopop = numbitsset(topop & XMMREGS); // XMM regs take 16 bytes 3785 int gptopop = numbitsset(topop) - xmmtopop; // general purpose registers to save 3786 targ_size_t xmmoffset = pushoff + BPoff; 3787 if (!hasframe || enforcealign) 3788 xmmoffset += EBPtoESP; 3789 targ_size_t gpoffset = xmmoffset + xmmtopop * 16; 3790 while (topop) 3791 { 3792 reg_t reg = findreg(topop); 3793 topop &= ~mask(reg); 3794 if (isXMMreg(reg)) 3795 { 3796 if (hasframe && !enforcealign) 3797 { 3798 // MOVUPD xmm,xmmoffset[EBP] 3799 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3800 } 3801 else 3802 { 3803 // MOVUPD xmm,xmmoffset[ESP] 3804 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3805 } 3806 xmmoffset += 16; 3807 } 3808 else 3809 { 3810 if (hasframe && !enforcealign) 3811 { 3812 // MOV reg,gpoffset[EBP] 3813 cdb.genc1(0x8B,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3814 } 3815 else 3816 { 3817 // MOV reg,gpoffset[ESP] 3818 cdb.genc1(0x8B,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3819 } 3820 if (I64) 3821 code_orrex(cdb.last(), REX_W); 3822 gpoffset += REGSIZE; 3823 } 3824 } 3825 } 3826 else 3827 { 3828 reg_t reg = I64 ? XMM7 : DI; 3829 if (!(topop & XMMREGS)) 3830 reg = R15; 3831 regm_t regm = 1 << reg; 3832 3833 while (topop) 3834 { if (topop & regm) 3835 { 3836 if (isXMMreg(reg)) 3837 { 3838 // MOVUPD xmm,0[RSP] 3839 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3840 // ADD RSP,16 3841 cod3_stackadj(cdb, -16); 3842 } 3843 else 3844 { 3845 cdb.gen1(0x58 + (reg & 7)); // POP reg 3846 if (reg & 8) 3847 code_orrex(cdb.last(), REX_B); 3848 } 3849 topop &= ~regm; 3850 } 3851 regm >>= 1; 3852 reg--; 3853 } 3854 } 3855 } 3856 3857 version (SCPP) 3858 { 3859 void prolog_trace(ref CodeBuilder cdb, bool farfunc, uint* regsaved) 3860 { 3861 Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_PRO_F : RTLSYM_TRACE_PRO_N); 3862 makeitextern(s); 3863 cdb.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALL _trace 3864 if (!I16) 3865 code_orflag(cdb.last(),CFoff | CFselfrel); 3866 /* Embedding the function name inline after the call works, but it 3867 * makes disassembling the code annoying. 3868 */ 3869 static if (ELFOBJ || MACHOBJ) 3870 { 3871 // Generate length prefixed name that is recognized by profiler 3872 size_t len = strlen(funcsym_p.Sident); 3873 char *buffer = cast(char *)malloc(len + 4); 3874 assert(buffer); 3875 if (len <= 254) 3876 { 3877 buffer[0] = len; 3878 memcpy(buffer + 1, funcsym_p.Sident, len); 3879 len++; 3880 } 3881 else 3882 { 3883 buffer[0] = 0xFF; 3884 buffer[1] = 0; 3885 buffer[2] = len & 0xFF; 3886 buffer[3] = len >> 8; 3887 memcpy(buffer + 4, funcsym_p.Sident, len); 3888 len += 4; 3889 } 3890 cdb.genasm(buffer, len); // append func name 3891 free(buffer); 3892 } 3893 else 3894 { 3895 char [IDMAX+IDOHD+1] name = void; 3896 size_t len = objmod.mangle(funcsym_p,name.ptr); 3897 assert(len < name.length); 3898 cdb.genasm(name.ptr,len); // append func name 3899 } 3900 *regsaved = s.Sregsaved; 3901 } 3902 } 3903 3904 /****************************** 3905 * Generate special varargs prolog for Posix 64 bit systems. 3906 * Params: 3907 * cdb = sink for generated code 3908 * sv = symbol for __va_argsave 3909 * namedargs = registers that named parameters (not ... arguments) were passed in. 3910 */ 3911 void prolog_genvarargs(ref CodeBuilder cdb, Symbol* sv, regm_t namedargs) 3912 { 3913 /* Generate code to move any arguments passed in registers into 3914 * the stack variable __va_argsave, 3915 * so we can reference it via pointers through va_arg(). 3916 * struct __va_argsave_t { 3917 * size_t[6] regs; 3918 * real[8] fpregs; 3919 * uint offset_regs; 3920 * uint offset_fpregs; 3921 * void* stack_args; 3922 * void* reg_args; 3923 * } 3924 * The MOVAPS instructions seg fault if data is not aligned on 3925 * 16 bytes, so this gives us a nice check to ensure no mistakes. 3926 MOV voff+0*8[RBP],EDI 3927 MOV voff+1*8[RBP],ESI 3928 MOV voff+2*8[RBP],RDX 3929 MOV voff+3*8[RBP],RCX 3930 MOV voff+4*8[RBP],R8 3931 MOV voff+5*8[RBP],R9 3932 MOVZX EAX,AL // AL = 0..8, # of XMM registers used 3933 SHL EAX,2 // 4 bytes for each MOVAPS 3934 LEA R11,offset L2[RIP] 3935 SUB R11,RAX 3936 LEA RAX,voff+6*8+0x7F[RBP] 3937 JMP R11d 3938 MOVAPS -0x0F[RAX],XMM7 // only save XMM registers if actually used 3939 MOVAPS -0x1F[RAX],XMM6 3940 MOVAPS -0x2F[RAX],XMM5 3941 MOVAPS -0x3F[RAX],XMM4 3942 MOVAPS -0x4F[RAX],XMM3 3943 MOVAPS -0x5F[RAX],XMM2 3944 MOVAPS -0x6F[RAX],XMM1 3945 MOVAPS -0x7F[RAX],XMM0 3946 L2: 3947 MOV 1[RAX],offset_regs // set __va_argsave.offset_regs 3948 MOV 5[RAX],offset_fpregs // set __va_argsave.offset_fpregs 3949 LEA R11, Para.size+Para.offset[RBP] 3950 MOV 9[RAX],R11 // set __va_argsave.stack_args 3951 SUB RAX,6*8+0x7F // point to start of __va_argsave 3952 MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args 3953 * RAX and R11 are destroyed. 3954 */ 3955 3956 /* Save registers into the voff area on the stack 3957 */ 3958 targ_size_t voff = Auto.size + BPoff + sv.Soffset; // EBP offset of start of sv 3959 const int vregnum = 6; 3960 const uint vsize = vregnum * 8 + 8 * 16; 3961 3962 static immutable ubyte[vregnum] regs = [ DI,SI,DX,CX,R8,R9 ]; 3963 3964 if (!hasframe || enforcealign) 3965 voff += EBPtoESP; 3966 3967 for (int i = 0; i < vregnum; i++) 3968 { 3969 uint r = regs[i]; 3970 if (!(mask(r) & namedargs)) // unnamed arguments would be the ... ones 3971 { 3972 uint ea = (REX_W << 16) | modregxrm(2,r,BPRM); 3973 if (!hasframe || enforcealign) 3974 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4); 3975 cdb.genc1(0x89,ea,FLconst,voff + i*8); 3976 } 3977 } 3978 3979 genregs(cdb,MOVZXb,AX,AX); // MOVZX EAX,AL 3980 cdb.genc2(0xC1,modregrm(3,4,AX),2); // SHL EAX,2 3981 int raxoff = cast(int)(voff+6*8+0x7F); 3982 uint L2offset = (raxoff < -0x7F) ? 0x2D : 0x2A; 3983 if (!hasframe || enforcealign) 3984 L2offset += 1; // +1 for sib byte 3985 // LEA R11,offset L2[RIP] 3986 cdb.genc1(LEA,(REX_W << 16) | modregxrm(0,R11,5),FLconst,L2offset); 3987 genregs(cdb,0x29,AX,R11); // SUB R11,RAX 3988 code_orrex(cdb.last(), REX_W); 3989 // LEA RAX,voff+vsize-6*8-16+0x7F[RBP] 3990 uint ea = (REX_W << 16) | modregrm(2,AX,BPRM); 3991 if (!hasframe || enforcealign) 3992 // add sib byte for [RSP] addressing 3993 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4); 3994 cdb.genc1(LEA,ea,FLconst,raxoff); 3995 cdb.gen2(0xFF,modregrmx(3,4,R11)); // JMP R11d 3996 for (int i = 0; i < 8; i++) 3997 { 3998 // MOVAPS -15-16*i[RAX],XMM7-i 3999 cdb.genc1(0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i); 4000 } 4001 4002 /* Compute offset_regs and offset_fpregs 4003 */ 4004 uint offset_regs = 0; 4005 uint offset_fpregs = vregnum * 8; 4006 for (int i = AX; i <= XMM7; i++) 4007 { 4008 regm_t m = mask(i); 4009 if (m & namedargs) 4010 { 4011 if (m & (mDI|mSI|mDX|mCX|mR8|mR9)) 4012 offset_regs += 8; 4013 else if (m & XMMREGS) 4014 offset_fpregs += 16; 4015 namedargs &= ~m; 4016 if (!namedargs) 4017 break; 4018 } 4019 } 4020 // MOV 1[RAX],offset_regs 4021 cdb.genc(0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs); 4022 4023 // MOV 5[RAX],offset_fpregs 4024 cdb.genc(0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs); 4025 4026 // LEA R11, Para.size+Para.offset[RBP] 4027 ea = modregxrm(2,R11,BPRM); 4028 if (!hasframe) 4029 ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4); 4030 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 4031 cdb.genc1(LEA,(REX_W << 16) | ea,FLconst,Para.size + Para.offset); 4032 4033 // MOV 9[RAX],R11 4034 cdb.genc1(0x89,(REX_W << 16) | modregxrm(2,R11,AX),FLconst,9); 4035 4036 // SUB RAX,6*8+0x7F // point to start of __va_argsave 4037 cdb.genc2(0x2D,0,6*8+0x7F); 4038 code_orrex(cdb.last(), REX_W); 4039 4040 // MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args 4041 cdb.genc1(0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8); 4042 4043 pinholeopt(cdb.peek(), null); 4044 useregs(mAX|mR11); 4045 } 4046 4047 void prolog_gen_win64_varargs(ref CodeBuilder cdb) 4048 { 4049 /* The Microsoft scheme. 4050 * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80) 4051 * Copy registers onto stack. 4052 mov 8[RSP],RCX 4053 mov 010h[RSP],RDX 4054 mov 018h[RSP],R8 4055 mov 020h[RSP],R9 4056 */ 4057 } 4058 4059 /************************************ 4060 * Params: 4061 * cdb = generated code sink 4062 * tf = what's the type of the function 4063 * pushalloc = use PUSH to allocate on the stack rather than subtracting from SP 4064 * namedargs = set to the registers that named parameters were passed in 4065 */ 4066 void prolog_loadparams(ref CodeBuilder cdb, tym_t tyf, bool pushalloc, out regm_t namedargs) 4067 { 4068 //printf("prolog_loadparams()\n"); 4069 debug 4070 for (SYMIDX si = 0; si < globsym.length; si++) 4071 { 4072 Symbol *s = globsym[si]; 4073 if (debugr && (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)) 4074 { 4075 printf("symbol '%s' is fastpar in register [l %s, m %s]\n", s.Sident.ptr, 4076 regm_str(mask(s.Spreg)), 4077 (s.Spreg2 == NOREG ? "NOREG" : regm_str(mask(s.Spreg2)))); 4078 if (s.Sfl == FLreg) 4079 printf("\tassigned to register %s\n", regm_str(mask(s.Sreglsw))); 4080 } 4081 } 4082 4083 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 4084 4085 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were not assigned 4086 * registers into their stack locations. 4087 */ 4088 regm_t shadowregm = 0; 4089 for (SYMIDX si = 0; si < globsym.length; si++) 4090 { 4091 Symbol *s = globsym[si]; 4092 uint sz = cast(uint)type_size(s.Stype); 4093 4094 if ((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl != FLreg) 4095 { // Argument is passed in a register 4096 4097 type *t = s.Stype; 4098 type *t2 = null; 4099 4100 tym_t tyb = tybasic(t.Tty); 4101 4102 // This logic is same as FuncParamRegs_alloc function at src/dmd/backend/cod1.d 4103 // 4104 // Find suitable SROA based on the element type 4105 // (Don't put volatile parameters in registers) 4106 if (tyb == TYarray && !(t.Tty & mTYvolatile)) 4107 { 4108 type *targ1; 4109 argtypes(t, targ1, t2); 4110 if (targ1) 4111 t = targ1; 4112 } 4113 4114 // If struct just wraps another type 4115 if (tyb == TYstruct) 4116 { 4117 // On windows 64 bits, structs occupy a general purpose register, 4118 // regardless of the struct size or the number & types of its fields. 4119 if (config.exe != EX_WIN64) 4120 { 4121 type *targ1 = t.Ttag.Sstruct.Sarg1type; 4122 t2 = t.Ttag.Sstruct.Sarg2type; 4123 if (targ1) 4124 t = targ1; 4125 } 4126 } 4127 4128 if (Symbol_Sisdead(s, anyiasm)) 4129 { 4130 // Ignore it, as it is never referenced 4131 } 4132 else 4133 { 4134 targ_size_t offset = Fast.size + BPoff; 4135 if (s.Sclass == SCshadowreg) 4136 offset = Para.size; 4137 offset += s.Soffset; 4138 if (!hasframe || (enforcealign && s.Sclass != SCshadowreg)) 4139 offset += EBPtoESP; 4140 4141 reg_t preg = s.Spreg; 4142 foreach (i; 0 .. 2) // twice, once for each possible parameter register 4143 { 4144 shadowregm |= mask(preg); 4145 opcode_t op = 0x89; // MOV x[EBP],preg 4146 if (isXMMreg(preg)) 4147 op = xmmstore(t.Tty); 4148 if (!(pushalloc && preg == pushallocreg) || s.Sclass == SCshadowreg) 4149 { 4150 if (hasframe && (!enforcealign || s.Sclass == SCshadowreg)) 4151 { 4152 // MOV x[EBP],preg 4153 cdb.genc1(op,modregxrm(2,preg,BPRM),FLconst,offset); 4154 if (isXMMreg(preg)) 4155 { 4156 checkSetVex(cdb.last(), t.Tty); 4157 } 4158 else 4159 { 4160 //printf("%s Fast.size = %d, BPoff = %d, Soffset = %d, sz = %d\n", 4161 // s.Sident, (int)Fast.size, (int)BPoff, (int)s.Soffset, (int)sz); 4162 if (I64 && sz > 4) 4163 code_orrex(cdb.last(), REX_W); 4164 } 4165 } 4166 else 4167 { 4168 // MOV offset[ESP],preg 4169 // BUG: byte size? 4170 cdb.genc1(op, 4171 (modregrm(0,4,SP) << 8) | 4172 modregxrm(2,preg,4),FLconst,offset); 4173 if (isXMMreg(preg)) 4174 { 4175 checkSetVex(cdb.last(), t.Tty); 4176 } 4177 else 4178 { 4179 if (I64 && sz > 4) 4180 cdb.last().Irex |= REX_W; 4181 } 4182 } 4183 } 4184 preg = s.Spreg2; 4185 if (preg == NOREG) 4186 break; 4187 if (t2) 4188 t = t2; 4189 offset += REGSIZE; 4190 } 4191 } 4192 } 4193 } 4194 4195 if (config.exe == EX_WIN64 && variadic(funcsym_p.Stype)) 4196 { 4197 /* The Microsoft scheme. 4198 * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80) 4199 * Copy registers onto stack. 4200 mov 8[RSP],RCX or XMM0 4201 mov 010h[RSP],RDX or XMM1 4202 mov 018h[RSP],R8 or XMM2 4203 mov 020h[RSP],R9 or XMM3 4204 */ 4205 static immutable reg_t[4] vregs = [ CX,DX,R8,R9 ]; 4206 for (int i = 0; i < vregs.length; ++i) 4207 { 4208 uint preg = vregs[i]; 4209 uint offset = cast(uint)(Para.size + i * REGSIZE); 4210 if (!(shadowregm & (mask(preg) | mask(XMM0 + i)))) 4211 { 4212 if (hasframe) 4213 { 4214 // MOV x[EBP],preg 4215 cdb.genc1(0x89, 4216 modregxrm(2,preg,BPRM),FLconst, offset); 4217 code_orrex(cdb.last(), REX_W); 4218 } 4219 else 4220 { 4221 // MOV offset[ESP],preg 4222 cdb.genc1(0x89, 4223 (modregrm(0,4,SP) << 8) | 4224 modregxrm(2,preg,4),FLconst,offset + EBPtoESP); 4225 } 4226 cdb.last().Irex |= REX_W; 4227 } 4228 } 4229 } 4230 4231 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were assigned registers 4232 * into their assigned registers. 4233 * Note that we have a big problem if Pa is passed in R1 and assigned to R2, 4234 * and Pb is passed in R2 but assigned to R1. Detect it and assert. 4235 */ 4236 regm_t assignregs = 0; 4237 for (SYMIDX si = 0; si < globsym.length; si++) 4238 { 4239 Symbol *s = globsym[si]; 4240 uint sz = cast(uint)type_size(s.Stype); 4241 4242 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 4243 namedargs |= s.Spregm(); 4244 4245 if ((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl == FLreg) 4246 { // Argument is passed in a register 4247 4248 type *t = s.Stype; 4249 type *t2 = null; 4250 if (tybasic(t.Tty) == TYstruct && config.exe != EX_WIN64) 4251 { type *targ1 = t.Ttag.Sstruct.Sarg1type; 4252 t2 = t.Ttag.Sstruct.Sarg2type; 4253 if (targ1) 4254 t = targ1; 4255 } 4256 4257 reg_t preg = s.Spreg; 4258 reg_t r = s.Sreglsw; 4259 for (int i = 0; i < 2; ++i) 4260 { 4261 if (preg == NOREG) 4262 break; 4263 assert(!(mask(preg) & assignregs)); // not already stepped on 4264 assignregs |= mask(r); 4265 4266 // MOV reg,preg 4267 if (r == preg) 4268 { 4269 } 4270 else if (mask(preg) & XMMREGS) 4271 { 4272 const op = xmmload(t.Tty); // MOVSS/D xreg,preg 4273 uint xreg = r - XMM0; 4274 cdb.gen2(op,modregxrmx(3,xreg,preg - XMM0)); 4275 } 4276 else 4277 { 4278 //printf("test1 mov %s, %s\n", regstring[r], regstring[preg]); 4279 genmovreg(cdb,r,preg); 4280 if (I64 && sz == 8) 4281 code_orrex(cdb.last(), REX_W); 4282 } 4283 preg = s.Spreg2; 4284 r = s.Sregmsw; 4285 if (t2) 4286 t = t2; 4287 } 4288 } 4289 } 4290 4291 /* For parameters that were passed on the stack, but are enregistered, 4292 * initialize the registers with the parameter stack values. 4293 * Do not use assignaddr(), as it will replace the stack reference with 4294 * the register. 4295 */ 4296 for (SYMIDX si = 0; si < globsym.length; si++) 4297 { 4298 Symbol *s = globsym[si]; 4299 uint sz = cast(uint)type_size(s.Stype); 4300 4301 if ((s.Sclass == SCregpar || s.Sclass == SCparameter) && 4302 s.Sfl == FLreg && 4303 (refparam 4304 // This variable has been reference by a nested function 4305 || MARS && s.Stype.Tty & mTYvolatile 4306 )) 4307 { 4308 // MOV reg,param[BP] 4309 //assert(refparam); 4310 if (mask(s.Sreglsw) & XMMREGS) 4311 { 4312 const op = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4313 uint xreg = s.Sreglsw - XMM0; 4314 cdb.genc1(op,modregxrm(2,xreg,BPRM),FLconst,Para.size + s.Soffset); 4315 if (!hasframe) 4316 { // Convert to ESP relative address rather than EBP 4317 code *c = cdb.last(); 4318 c.Irm = cast(ubyte)modregxrm(2,xreg,4); 4319 c.Isib = modregrm(0,4,SP); 4320 c.IEV1.Vpointer += EBPtoESP; 4321 } 4322 } 4323 else 4324 { 4325 cdb.genc1(sz == 1 ? 0x8A : 0x8B, 4326 modregxrm(2,s.Sreglsw,BPRM),FLconst,Para.size + s.Soffset); 4327 code *c = cdb.last(); 4328 if (!I16 && sz == SHORTSIZE) 4329 c.Iflags |= CFopsize; // operand size 4330 if (I64 && sz >= REGSIZE) 4331 c.Irex |= REX_W; 4332 if (I64 && sz == 1 && s.Sreglsw >= 4) 4333 c.Irex |= REX; 4334 if (!hasframe) 4335 { // Convert to ESP relative address rather than EBP 4336 assert(!I16); 4337 c.Irm = cast(ubyte)modregxrm(2,s.Sreglsw,4); 4338 c.Isib = modregrm(0,4,SP); 4339 c.IEV1.Vpointer += EBPtoESP; 4340 } 4341 if (sz > REGSIZE) 4342 { 4343 cdb.genc1(0x8B, 4344 modregxrm(2,s.Sregmsw,BPRM),FLconst,Para.size + s.Soffset + REGSIZE); 4345 code *cx = cdb.last(); 4346 if (I64) 4347 cx.Irex |= REX_W; 4348 if (!hasframe) 4349 { // Convert to ESP relative address rather than EBP 4350 assert(!I16); 4351 cx.Irm = cast(ubyte)modregxrm(2,s.Sregmsw,4); 4352 cx.Isib = modregrm(0,4,SP); 4353 cx.IEV1.Vpointer += EBPtoESP; 4354 } 4355 } 4356 } 4357 } 4358 } 4359 } 4360 4361 /******************************* 4362 * Generate and return function epilog. 4363 * Output: 4364 * retsize Size of function epilog 4365 */ 4366 4367 void epilog(block *b) 4368 { 4369 code *cpopds; 4370 reg_t reg; 4371 reg_t regx; // register that's not a return reg 4372 regm_t topop,regm; 4373 targ_size_t xlocalsize = localsize; 4374 4375 CodeBuilder cdbx; cdbx.ctor(); 4376 tym_t tyf = funcsym_p.ty(); 4377 tym_t tym = tybasic(tyf); 4378 bool farfunc = tyfarfunc(tym) != 0; 4379 if (!(b.Bflags & BFLepilog)) // if no epilog code 4380 goto Lret; // just generate RET 4381 regx = (b.BC == BCret) ? AX : CX; 4382 4383 retsize = 0; 4384 4385 if (tyf & mTYnaked) // if no prolog/epilog 4386 return; 4387 4388 if (tym == TYifunc) 4389 { 4390 static immutable ubyte[5] ops2 = [ 0x07,0x1F,0x61,0xCF,0 ]; 4391 static immutable ubyte[12] ops0 = [ 0x07,0x1F,0x5F,0x5E, 4392 0x5D,0x5B,0x5B,0x5A, 4393 0x59,0x58,0xCF,0 ]; 4394 4395 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4396 auto p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 4397 do 4398 cdbx.gen1(*p); 4399 while (*++p); 4400 goto Lopt; 4401 } 4402 4403 if (config.flags & CFGtrace && 4404 (!(config.flags4 & CFG4allcomdat) || 4405 funcsym_p.Sclass == SCcomdat || 4406 funcsym_p.Sclass == SCglobal || 4407 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 4408 ) 4409 ) 4410 { 4411 Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N); 4412 makeitextern(s); 4413 cdbx.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALLF _trace 4414 if (!I16) 4415 code_orflag(cdbx.last(),CFoff | CFselfrel); 4416 useregs((ALLREGS | mBP | mES) & ~s.Sregsaved); 4417 } 4418 4419 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.exe == EX_WIN32 || MARS)) 4420 { 4421 nteh_epilog(cdbx); 4422 } 4423 4424 cpopds = null; 4425 if (tyf & mTYloadds) 4426 { 4427 cdbx.gen1(0x1F); // POP DS 4428 cpopds = cdbx.last(); 4429 } 4430 4431 /* Pop all the general purpose registers saved on the stack 4432 * by the prolog code. Remember to do them in the reverse 4433 * order they were pushed. 4434 */ 4435 topop = fregsaved & ~mfuncreg; 4436 epilog_restoreregs(cdbx, topop); 4437 4438 version (MARS) 4439 { 4440 if (usednteh & NTEHjmonitor) 4441 { 4442 regm_t retregs = 0; 4443 if (b.BC == BCretexp) 4444 retregs = regmask(b.Belem.Ety, tym); 4445 nteh_monitor_epilog(cdbx,retregs); 4446 xlocalsize += 8; 4447 } 4448 } 4449 4450 if (config.wflags & WFwindows && farfunc) 4451 { 4452 int wflags = config.wflags; 4453 if (wflags & WFreduced && !(tyf & mTYexport)) 4454 { // reduced prolog/epilog for non-exported functions 4455 wflags &= ~(WFdgroup | WFds | WFss); 4456 if (!(wflags & WFsaveds)) 4457 goto L4; 4458 } 4459 4460 if (localsize) 4461 { 4462 cdbx.genc1(LEA,modregrm(1,SP,6),FLconst,cast(targ_uns)-2); /* LEA SP,-2[BP] */ 4463 } 4464 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 4465 { 4466 if (cpopds) 4467 cpopds.Iop = NOP; // don't need previous one 4468 cdbx.gen1(0x1F); // POP DS 4469 } 4470 cdbx.gen1(0x58 + BP); // POP BP 4471 if (config.wflags & WFincbp) 4472 cdbx.gen1(0x48 + BP); // DEC BP 4473 assert(hasframe); 4474 } 4475 else 4476 { 4477 if (needframe || (xlocalsize && hasframe)) 4478 { 4479 L4: 4480 assert(hasframe); 4481 if (xlocalsize || enforcealign) 4482 { 4483 if (config.flags2 & CFG2stomp) 4484 { /* MOV ECX,0xBEAF 4485 * L1: 4486 * MOV [ESP],ECX 4487 * ADD ESP,4 4488 * CMP EBP,ESP 4489 * JNE L1 4490 * POP EBP 4491 */ 4492 /* Value should be: 4493 * 1. != 0 (code checks for null pointers) 4494 * 2. be odd (to mess up alignment) 4495 * 3. fall in first 64K (likely marked as inaccessible) 4496 * 4. be a value that stands out in the debugger 4497 */ 4498 assert(I32 || I64); 4499 targ_size_t value = 0x0000BEAF; 4500 reg_t regcx = CX; 4501 mfuncreg &= ~mask(regcx); 4502 uint grex = I64 ? REX_W << 16 : 0; 4503 cdbx.genc2(0xC7,grex | modregrmx(3,0,regcx),value); // MOV regcx,value 4504 cdbx.gen2sib(0x89,grex | modregrm(0,regcx,4),modregrm(0,4,SP)); // MOV [ESP],regcx 4505 code *c1 = cdbx.last(); 4506 cdbx.genc2(0x81,grex | modregrm(3,0,SP),REGSIZE); // ADD ESP,REGSIZE 4507 genregs(cdbx,0x39,SP,BP); // CMP EBP,ESP 4508 if (I64) 4509 code_orrex(cdbx.last(),REX_W); 4510 genjmp(cdbx,JNE,FLcode,cast(block *)c1); // JNE L1 4511 // explicitly mark as short jump, needed for correct retsize calculation (Bugzilla 15779) 4512 cdbx.last().Iflags &= ~CFjmp16; 4513 cdbx.gen1(0x58 + BP); // POP BP 4514 } 4515 else if (config.exe == EX_WIN64) 4516 { // See http://msdn.microsoft.com/en-us/library/tawsa7cb(v=vs.80).aspx 4517 // LEA RSP,0[RBP] 4518 cdbx.genc1(LEA,(REX_W<<16)|modregrm(2,SP,BPRM),FLconst,0); 4519 cdbx.gen1(0x58 + BP); // POP RBP 4520 } 4521 else if (config.target_cpu >= TARGET_80286 && 4522 !(config.target_cpu >= TARGET_80386 && config.flags4 & CFG4speed) 4523 ) 4524 cdbx.gen1(LEAVE); // LEAVE 4525 else if (0 && xlocalsize == REGSIZE && Alloca.size == 0 && I32) 4526 { // This doesn't work - I should figure out why 4527 mfuncreg &= ~mask(regx); 4528 cdbx.gen1(0x58 + regx); // POP regx 4529 cdbx.gen1(0x58 + BP); // POP BP 4530 } 4531 else 4532 { 4533 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4534 if (I64) 4535 code_orrex(cdbx.last(), REX_W); // MOV RSP,RBP 4536 cdbx.gen1(0x58 + BP); // POP BP 4537 } 4538 } 4539 else 4540 cdbx.gen1(0x58 + BP); // POP BP 4541 if (config.wflags & WFincbp && farfunc) 4542 cdbx.gen1(0x48 + BP); // DEC BP 4543 } 4544 else if (xlocalsize == REGSIZE && (!I16 || b.BC == BCret)) 4545 { 4546 mfuncreg &= ~mask(regx); 4547 cdbx.gen1(0x58 + regx); // POP regx 4548 } 4549 else if (xlocalsize) 4550 cod3_stackadj(cdbx, cast(int)-xlocalsize); 4551 } 4552 if (b.BC == BCret || b.BC == BCretexp) 4553 { 4554 Lret: 4555 opcode_t op = tyfarfunc(tym) ? 0xCA : 0xC2; 4556 if (tym == TYhfunc) 4557 { 4558 cdbx.genc2(0xC2,0,4); // RET 4 4559 } 4560 else if (!typfunc(tym) || // if caller cleans the stack 4561 config.exe == EX_WIN64 || 4562 Para.offset == 0) // or nothing pushed on the stack anyway 4563 { 4564 op++; // to a regular RET 4565 cdbx.gen1(op); 4566 } 4567 else 4568 { // Stack is always aligned on register size boundary 4569 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 4570 if (Para.offset >= 0x10000) 4571 { 4572 /* 4573 POP REG 4574 ADD ESP, Para.offset 4575 JMP REG 4576 */ 4577 cdbx.gen1(0x58+regx); 4578 cdbx.genc2(0x81, modregrm(3,0,SP), Para.offset); 4579 if (I64) 4580 code_orrex(cdbx.last(), REX_W); 4581 cdbx.genc2(0xFF, modregrm(3,4,regx), 0); 4582 if (I64) 4583 code_orrex(cdbx.last(), REX_W); 4584 } 4585 else 4586 cdbx.genc2(op,0,Para.offset); // RET Para.offset 4587 } 4588 } 4589 4590 Lopt: 4591 // If last instruction in ce is ADD SP,imm, and first instruction 4592 // in c sets SP, we can dump the ADD. 4593 CodeBuilder cdb; cdb.ctor(); 4594 cdb.append(b.Bcode); 4595 code *cr = cdb.last(); 4596 code *c = cdbx.peek(); 4597 if (cr && c && !I64) 4598 { 4599 if (cr.Iop == 0x81 && cr.Irm == modregrm(3,0,SP)) // if ADD SP,imm 4600 { 4601 if ( 4602 c.Iop == LEAVE || // LEAVE 4603 (c.Iop == 0x8B && c.Irm == modregrm(3,SP,BP)) || // MOV SP,BP 4604 (c.Iop == LEA && c.Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP] 4605 ) 4606 cr.Iop = NOP; 4607 else if (c.Iop == 0x58 + BP) // if POP BP 4608 { 4609 cr.Iop = 0x8B; 4610 cr.Irm = modregrm(3,SP,BP); // MOV SP,BP 4611 } 4612 } 4613 else 4614 { 4615 static if (0) 4616 { 4617 // These optimizations don't work if the called function 4618 // cleans off the stack. 4619 if (c.Iop == 0xC3 && cr.Iop == CALL) // CALL near 4620 { 4621 cr.Iop = 0xE9; // JMP near 4622 c.Iop = NOP; 4623 } 4624 else if (c.Iop == 0xCB && cr.Iop == 0x9A) // CALL far 4625 { 4626 cr.Iop = 0xEA; // JMP far 4627 c.Iop = NOP; 4628 } 4629 } 4630 } 4631 } 4632 4633 pinholeopt(c, null); 4634 retsize += calcblksize(c); // compute size of function epilog 4635 cdb.append(cdbx); 4636 b.Bcode = cdb.finish(); 4637 } 4638 4639 /******************************* 4640 * Return offset of SP from BP. 4641 */ 4642 4643 targ_size_t cod3_spoff() 4644 { 4645 //printf("spoff = x%x, localsize = x%x\n", (int)spoff, (int)localsize); 4646 return spoff + localsize; 4647 } 4648 4649 void gen_spill_reg(ref CodeBuilder cdb, Symbol* s, bool toreg) 4650 { 4651 code cs; 4652 const regm_t keepmsk = toreg ? RMload : RMstore; 4653 4654 elem* e = el_var(s); // so we can trick getlvalue() into working for us 4655 4656 if (mask(s.Sreglsw) & XMMREGS) 4657 { // Convert to save/restore of XMM register 4658 if (toreg) 4659 cs.Iop = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4660 else 4661 cs.Iop = xmmstore(s.Stype.Tty); // MOVSS/D mem,xreg 4662 getlvalue(cdb,&cs,e,keepmsk); 4663 cs.orReg(s.Sreglsw - XMM0); 4664 cdb.gen(&cs); 4665 } 4666 else 4667 { 4668 const int sz = cast(int)type_size(s.Stype); 4669 cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg 4670 cs.Iop ^= (sz == 1); 4671 getlvalue(cdb,&cs,e,keepmsk); 4672 cs.orReg(s.Sreglsw); 4673 if (I64 && sz == 1 && s.Sreglsw >= 4) 4674 cs.Irex |= REX; 4675 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4676 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4677 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4678 { } // skip MOV reg,reg 4679 else 4680 cdb.gen(&cs); 4681 if (sz > REGSIZE) 4682 { 4683 cs.setReg(s.Sregmsw); 4684 getlvalue_msw(&cs); 4685 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4686 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4687 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4688 { } // skip MOV reg,reg 4689 else 4690 cdb.gen(&cs); 4691 } 4692 } 4693 4694 el_free(e); 4695 } 4696 4697 /**************************** 4698 * Generate code for, and output a thunk. 4699 * Params: 4700 * sthunk = Symbol of thunk 4701 * sfunc = Symbol of thunk's target function 4702 * thisty = Type of this pointer 4703 * p = ESP parameter offset to this pointer 4704 * d = offset to add to 'this' pointer 4705 * d2 = offset from 'this' to vptr 4706 * i = offset into vtbl[] 4707 */ 4708 4709 void cod3_thunk(Symbol *sthunk,Symbol *sfunc,uint p,tym_t thisty, 4710 uint d,int i,uint d2) 4711 { 4712 targ_size_t thunkoffset; 4713 4714 int seg = sthunk.Sseg; 4715 cod3_align(seg); 4716 4717 // Skip over return address 4718 tym_t thunkty = tybasic(sthunk.ty()); 4719 if (tyfarfunc(thunkty)) 4720 p += I32 ? 8 : tysize(TYfptr); // far function 4721 else 4722 p += tysize(TYnptr); 4723 4724 CodeBuilder cdb; cdb.ctor(); 4725 if (!I16) 4726 { 4727 /* 4728 Generate: 4729 ADD p[ESP],d 4730 For direct call: 4731 JMP sfunc 4732 For virtual call: 4733 MOV EAX, p[ESP] EAX = this 4734 MOV EAX, d2[EAX] EAX = this.vptr 4735 JMP i[EAX] jump to virtual function 4736 */ 4737 reg_t reg = 0; 4738 if (cast(int)d < 0) 4739 { 4740 d = -d; 4741 reg = 5; // switch from ADD to SUB 4742 } 4743 if (thunkty == TYmfunc) 4744 { // ADD ECX,d 4745 if (d) 4746 cdb.genc2(0x81,modregrm(3,reg,CX),d); 4747 } 4748 else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc)) 4749 { // ADD EAX,d 4750 int rm = AX; 4751 if (config.exe == EX_WIN64) 4752 rm = CX; 4753 else if (I64) 4754 rm = DI; 4755 if (d) 4756 cdb.genc2(0x81,modregrm(3,reg,rm),d); 4757 } 4758 else 4759 { 4760 cdb.genc(0x81,modregrm(2,reg,4), 4761 FLconst,p, // to this 4762 FLconst,d); // ADD p[ESP],d 4763 cdb.last().Isib = modregrm(0,4,SP); 4764 } 4765 if (I64 && cdb.peek()) 4766 cdb.last().Irex |= REX_W; 4767 } 4768 else 4769 { 4770 /* 4771 Generate: 4772 MOV BX,SP 4773 ADD [SS:] p[BX],d 4774 For direct call: 4775 JMP sfunc 4776 For virtual call: 4777 MOV BX, p[BX] BX = this 4778 MOV BX, d2[BX] BX = this.vptr 4779 JMP i[BX] jump to virtual function 4780 */ 4781 4782 genregs(cdb,0x89,SP,BX); // MOV BX,SP 4783 cdb.genc(0x81,modregrm(2,0,7), 4784 FLconst,p, // to this 4785 FLconst,d); // ADD p[BX],d 4786 if (config.wflags & WFssneds || 4787 // If DS needs reloading from SS, 4788 // then assume SS != DS on thunk entry 4789 (LARGEDATA && config.wflags & WFss)) 4790 cdb.last().Iflags |= CFss; // SS: 4791 } 4792 4793 if ((i & 0xFFFF) != 0xFFFF) // if virtual call 4794 { 4795 const bool FARTHIS = (tysize(thisty) > REGSIZE); 4796 const bool FARVPTR = FARTHIS; 4797 4798 assert(thisty != TYvptr); // can't handle this case 4799 4800 if (!I16) 4801 { 4802 assert(!FARTHIS && !LARGECODE); 4803 if (thunkty == TYmfunc) // if 'this' is in ECX 4804 { 4805 // MOV EAX,d2[ECX] 4806 cdb.genc1(0x8B,modregrm(2,AX,CX),FLconst,d2); 4807 } 4808 else if (thunkty == TYjfunc) // if 'this' is in EAX 4809 { 4810 // MOV EAX,d2[EAX] 4811 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4812 } 4813 else 4814 { 4815 // MOV EAX,p[ESP] 4816 cdb.genc1(0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,cast(targ_uns) p); 4817 if (I64) 4818 cdb.last().Irex |= REX_W; 4819 4820 // MOV EAX,d2[EAX] 4821 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4822 } 4823 if (I64) 4824 code_orrex(cdb.last(), REX_W); 4825 // JMP i[EAX] 4826 cdb.genc1(0xFF,modregrm(2,4,0),FLconst,cast(targ_uns) i); 4827 } 4828 else 4829 { 4830 // MOV/LES BX,[SS:] p[BX] 4831 cdb.genc1((FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,cast(targ_uns) p); 4832 if (config.wflags & WFssneds || 4833 // If DS needs reloading from SS, 4834 // then assume SS != DS on thunk entry 4835 (LARGEDATA && config.wflags & WFss)) 4836 cdb.last().Iflags |= CFss; // SS: 4837 4838 // MOV/LES BX,[ES:]d2[BX] 4839 cdb.genc1((FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2); 4840 if (FARTHIS) 4841 cdb.last().Iflags |= CFes; // ES: 4842 4843 // JMP i[BX] 4844 cdb.genc1(0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,cast(targ_uns) i); 4845 if (FARVPTR) 4846 cdb.last().Iflags |= CFes; // ES: 4847 } 4848 } 4849 else 4850 { 4851 static if (0) 4852 { 4853 localgot = null; // no local variables 4854 code *c1 = load_localgot(); 4855 if (c1) 4856 { 4857 assignaddrc(c1); 4858 cdb.append(c1); 4859 } 4860 } 4861 cdb.gencs((LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); // JMP sfunc 4862 cdb.last().Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff); 4863 } 4864 4865 thunkoffset = Offset(seg); 4866 code *c = cdb.finish(); 4867 pinholeopt(c,null); 4868 codout(seg,c); 4869 code_free(c); 4870 4871 sthunk.Soffset = thunkoffset; 4872 sthunk.Ssize = Offset(seg) - thunkoffset; // size of thunk 4873 sthunk.Sseg = seg; 4874 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 4875 { 4876 objmod.pubdef(seg,sthunk,sthunk.Soffset); 4877 } 4878 static if (TARGET_WINDOS) 4879 { 4880 if (config.objfmt == OBJ_MSCOFF) 4881 objmod.pubdef(seg,sthunk,sthunk.Soffset); 4882 } 4883 searchfixlist(sthunk); // resolve forward refs 4884 } 4885 4886 /***************************** 4887 * Assume symbol s is extern. 4888 */ 4889 4890 void makeitextern(Symbol *s) 4891 { 4892 if (s.Sxtrnnum == 0) 4893 { 4894 s.Sclass = SCextern; /* external */ 4895 /*printf("makeitextern(x%x)\n",s);*/ 4896 objmod.external(s); 4897 } 4898 } 4899 4900 4901 /******************************* 4902 * Replace JMPs in Bgotocode with JMP SHORTs whereever possible. 4903 * This routine depends on FLcode jumps to only be forward 4904 * referenced. 4905 * BFLjmpoptdone is set to true if nothing more can be done 4906 * with this block. 4907 * Input: 4908 * flag !=0 means don't have correct Boffsets yet 4909 * Returns: 4910 * number of bytes saved 4911 */ 4912 4913 int branch(block *bl,int flag) 4914 { 4915 int bytesaved; 4916 code* c,cn,ct; 4917 targ_size_t offset,disp; 4918 targ_size_t csize; 4919 4920 if (!flag) 4921 bl.Bflags |= BFLjmpoptdone; // assume this will be all 4922 c = bl.Bcode; 4923 if (!c) 4924 return 0; 4925 bytesaved = 0; 4926 offset = bl.Boffset; /* offset of start of block */ 4927 while (1) 4928 { 4929 ubyte op; 4930 4931 csize = calccodsize(c); 4932 cn = code_next(c); 4933 op = cast(ubyte)c.Iop; 4934 if ((op & ~0x0F) == 0x70 && c.Iflags & CFjmp16 || 4935 (op == JMP && !(c.Iflags & CFjmp5))) 4936 { 4937 L1: 4938 switch (c.IFL2) 4939 { 4940 case FLblock: 4941 if (flag) // no offsets yet, don't optimize 4942 goto L3; 4943 disp = c.IEV2.Vblock.Boffset - offset - csize; 4944 4945 /* If this is a forward branch, and there is an aligned 4946 * block intervening, it is possible that shrinking 4947 * the jump instruction will cause it to be out of 4948 * range of the target. This happens if the alignment 4949 * prevents the target block from moving correspondingly 4950 * closer. 4951 */ 4952 if (disp >= 0x7F-4 && c.IEV2.Vblock.Boffset > offset) 4953 { /* Look for intervening alignment 4954 */ 4955 for (block *b = bl.Bnext; b; b = b.Bnext) 4956 { 4957 if (b.Balign) 4958 { 4959 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 4960 goto L3; 4961 } 4962 if (b == c.IEV2.Vblock) 4963 break; 4964 } 4965 } 4966 4967 break; 4968 4969 case FLcode: 4970 { 4971 code *cr; 4972 4973 disp = 0; 4974 4975 ct = c.IEV2.Vcode; /* target of branch */ 4976 assert(ct.Iflags & (CFtarg | CFtarg2)); 4977 for (cr = cn; cr; cr = code_next(cr)) 4978 { 4979 if (cr == ct) 4980 break; 4981 disp += calccodsize(cr); 4982 } 4983 4984 if (!cr) 4985 { // Didn't find it in forward search. Try backwards jump 4986 int s = 0; 4987 disp = 0; 4988 for (cr = bl.Bcode; cr != cn; cr = code_next(cr)) 4989 { 4990 assert(cr != null); // must have found it 4991 if (cr == ct) 4992 s = 1; 4993 if (s) 4994 disp += calccodsize(cr); 4995 } 4996 } 4997 4998 if (config.flags4 & CFG4optimized && !flag) 4999 { 5000 /* Propagate branch forward past junk */ 5001 while (1) 5002 { 5003 if (ct.Iop == NOP || 5004 ct.Iop == (ESCAPE | ESClinnum)) 5005 { 5006 ct = code_next(ct); 5007 if (!ct) 5008 goto L2; 5009 } 5010 else 5011 { 5012 c.IEV2.Vcode = ct; 5013 ct.Iflags |= CFtarg; 5014 break; 5015 } 5016 } 5017 5018 /* And eliminate jmps to jmps */ 5019 if ((op == ct.Iop || ct.Iop == JMP) && 5020 (op == JMP || c.Iflags & CFjmp16)) 5021 { 5022 c.IFL2 = ct.IFL2; 5023 c.IEV2.Vcode = ct.IEV2.Vcode; 5024 /*printf("eliminating branch\n");*/ 5025 goto L1; 5026 } 5027 L2: 5028 { } 5029 } 5030 } 5031 break; 5032 5033 default: 5034 goto L3; 5035 } 5036 5037 if (disp == 0) // bra to next instruction 5038 { 5039 bytesaved += csize; 5040 c.Iop = NOP; // del branch instruction 5041 c.IEV2.Vcode = null; 5042 c = cn; 5043 if (!c) 5044 break; 5045 continue; 5046 } 5047 else if (cast(targ_size_t)cast(targ_schar)(disp - 2) == (disp - 2) && 5048 cast(targ_size_t)cast(targ_schar)disp == disp) 5049 { 5050 if (op == JMP) 5051 { 5052 c.Iop = JMPS; // JMP SHORT 5053 bytesaved += I16 ? 1 : 3; 5054 } 5055 else // else Jcond 5056 { 5057 c.Iflags &= ~CFjmp16; // a branch is ok 5058 bytesaved += I16 ? 3 : 4; 5059 5060 // Replace a cond jump around a call to a function that 5061 // never returns with a cond jump to that function. 5062 if (config.flags4 & CFG4optimized && 5063 config.target_cpu >= TARGET_80386 && 5064 disp == (I16 ? 3 : 5) && 5065 cn && 5066 cn.Iop == CALL && 5067 cn.IFL2 == FLfunc && 5068 cn.IEV2.Vsym.Sflags & SFLexit && 5069 !(cn.Iflags & (CFtarg | CFtarg2)) 5070 ) 5071 { 5072 cn.Iop = 0x0F00 | ((c.Iop & 0x0F) ^ 0x81); 5073 c.Iop = NOP; 5074 c.IEV2.Vcode = null; 5075 bytesaved++; 5076 5077 // If nobody else points to ct, we can remove the CFtarg 5078 if (flag && ct) 5079 { 5080 code *cx; 5081 for (cx = bl.Bcode; 1; cx = code_next(cx)) 5082 { 5083 if (!cx) 5084 { 5085 ct.Iflags &= ~CFtarg; 5086 break; 5087 } 5088 if (cx.IEV2.Vcode == ct) 5089 break; 5090 } 5091 } 5092 } 5093 } 5094 csize = calccodsize(c); 5095 } 5096 else 5097 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 5098 } 5099 L3: 5100 if (cn) 5101 { 5102 offset += csize; 5103 c = cn; 5104 } 5105 else 5106 break; 5107 } 5108 //printf("bytesaved = x%x\n",bytesaved); 5109 return bytesaved; 5110 } 5111 5112 5113 /************************************************ 5114 * Adjust all Soffset's of stack variables so they 5115 * are all relative to the frame pointer. 5116 */ 5117 5118 version (MARS) 5119 { 5120 void cod3_adjSymOffsets() 5121 { 5122 SYMIDX si; 5123 5124 //printf("cod3_adjSymOffsets()\n"); 5125 for (si = 0; si < globsym.length; si++) 5126 { 5127 //printf("\tglobsym[%d] = %p\n",si,globsym[si]); 5128 Symbol *s = globsym[si]; 5129 5130 switch (s.Sclass) 5131 { 5132 case SCparameter: 5133 case SCregpar: 5134 case SCshadowreg: 5135 //printf("s = '%s', Soffset = x%x, Para.size = x%x, EBPtoESP = x%x\n", s.Sident, s.Soffset, Para.size, EBPtoESP); 5136 s.Soffset += Para.size; 5137 if (0 && !(funcsym_p.Sfunc.Fflags3 & Fmember)) 5138 { 5139 if (!hasframe) 5140 s.Soffset += EBPtoESP; 5141 if (funcsym_p.Sfunc.Fflags3 & Fnested) 5142 s.Soffset += REGSIZE; 5143 } 5144 break; 5145 5146 case SCfastpar: 5147 //printf("\tfastpar %s %p Soffset %x Fast.size %x BPoff %x\n", s.Sident, s, (int)s.Soffset, (int)Fast.size, (int)BPoff); 5148 s.Soffset += Fast.size + BPoff; 5149 break; 5150 5151 case SCauto: 5152 case SCregister: 5153 if (s.Sfl == FLfast) 5154 s.Soffset += Fast.size + BPoff; 5155 else 5156 //printf("s = '%s', Soffset = x%x, Auto.size = x%x, BPoff = x%x EBPtoESP = x%x\n", s.Sident, (int)s.Soffset, (int)Auto.size, (int)BPoff, (int)EBPtoESP); 5157 // if (!(funcsym_p.Sfunc.Fflags3 & Fnested)) 5158 s.Soffset += Auto.size + BPoff; 5159 break; 5160 5161 case SCbprel: 5162 break; 5163 5164 default: 5165 continue; 5166 } 5167 static if (0) 5168 { 5169 if (!hasframe) 5170 s.Soffset += EBPtoESP; 5171 } 5172 } 5173 } 5174 5175 } 5176 5177 /******************************* 5178 * Take symbol info in union ev and replace it with a real address 5179 * in Vpointer. 5180 */ 5181 5182 void assignaddr(block *bl) 5183 { 5184 int EBPtoESPsave = EBPtoESP; 5185 int hasframesave = hasframe; 5186 5187 if (bl.Bflags & BFLoutsideprolog) 5188 { 5189 EBPtoESP = -REGSIZE; 5190 hasframe = 0; 5191 } 5192 assignaddrc(bl.Bcode); 5193 hasframe = hasframesave; 5194 EBPtoESP = EBPtoESPsave; 5195 } 5196 5197 void assignaddrc(code *c) 5198 { 5199 int sn; 5200 Symbol *s; 5201 ubyte ins,rm; 5202 targ_size_t soff; 5203 targ_size_t base; 5204 5205 base = EBPtoESP; 5206 for (; c; c = code_next(c)) 5207 { 5208 debug 5209 { 5210 if (0) 5211 { printf("assignaddrc()\n"); 5212 code_print(c); 5213 } 5214 if (code_next(c) && code_next(code_next(c)) == c) 5215 assert(0); 5216 } 5217 5218 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 5219 ins = vex_inssize(c); 5220 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 5221 ins = inssize2[(c.Iop >> 8) & 0xFF]; 5222 else if ((c.Iop & 0xFF00) == 0x0F00) 5223 ins = inssize2[c.Iop & 0xFF]; 5224 else if ((c.Iop & 0xFF) == ESCAPE) 5225 { 5226 if (c.Iop == (ESCAPE | ESCadjesp)) 5227 { 5228 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,(long)c.IEV1.Vint); 5229 EBPtoESP += c.IEV1.Vint; 5230 c.Iop = NOP; 5231 } 5232 else if (c.Iop == (ESCAPE | ESCfixesp)) 5233 { 5234 //printf("fix ESP\n"); 5235 if (hasframe) 5236 { 5237 // LEA ESP,-EBPtoESP[EBP] 5238 c.Iop = LEA; 5239 if (c.Irm & 8) 5240 c.Irex |= REX_R; 5241 c.Irm = modregrm(2,SP,BP); 5242 c.Iflags = CFoff; 5243 c.IFL1 = FLconst; 5244 c.IEV1.Vuns = -EBPtoESP; 5245 if (enforcealign) 5246 { 5247 // AND ESP, -STACKALIGN 5248 code *cn = code_calloc(); 5249 cn.Iop = 0x81; 5250 cn.Irm = modregrm(3, 4, SP); 5251 cn.Iflags = CFoff; 5252 cn.IFL2 = FLconst; 5253 cn.IEV2.Vsize_t = -STACKALIGN; 5254 if (I64) 5255 c.Irex |= REX_W; 5256 cn.next = c.next; 5257 c.next = cn; 5258 } 5259 } 5260 } 5261 else if (c.Iop == (ESCAPE | ESCframeptr)) 5262 { // Convert to load of frame pointer 5263 // c.Irm is the register to use 5264 if (hasframe && !enforcealign) 5265 { // MOV reg,EBP 5266 c.Iop = 0x89; 5267 if (c.Irm & 8) 5268 c.Irex |= REX_B; 5269 c.Irm = modregrm(3,BP,c.Irm & 7); 5270 } 5271 else 5272 { // LEA reg,EBPtoESP[ESP] 5273 c.Iop = LEA; 5274 if (c.Irm & 8) 5275 c.Irex |= REX_R; 5276 c.Irm = modregrm(2,c.Irm & 7,4); 5277 c.Isib = modregrm(0,4,SP); 5278 c.Iflags = CFoff; 5279 c.IFL1 = FLconst; 5280 c.IEV1.Vuns = EBPtoESP; 5281 } 5282 } 5283 if (I64) 5284 c.Irex |= REX_W; 5285 continue; 5286 } 5287 else 5288 ins = inssize[c.Iop & 0xFF]; 5289 if (!(ins & M) || 5290 ((rm = c.Irm) & 0xC0) == 0xC0) 5291 goto do2; /* if no first operand */ 5292 if (is32bitaddr(I32,c.Iflags)) 5293 { 5294 5295 if ( 5296 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 5297 ) 5298 goto do2; /* if no first operand */ 5299 } 5300 else 5301 { 5302 if ( 5303 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 5304 ) 5305 goto do2; /* if no first operand */ 5306 } 5307 s = c.IEV1.Vsym; 5308 switch (c.IFL1) 5309 { 5310 case FLdata: 5311 if (config.objfmt == OBJ_OMF && s.Sclass != SCcomdat && s.Sclass != SCextern) 5312 { 5313 version (MARS) 5314 { 5315 c.IEV1.Vseg = s.Sseg; 5316 } 5317 else 5318 { 5319 c.IEV1.Vseg = DATA; 5320 } 5321 c.IEV1.Vpointer += s.Soffset; 5322 c.IFL1 = FLdatseg; 5323 } 5324 else 5325 c.IFL1 = FLextern; 5326 goto do2; 5327 5328 case FLudata: 5329 if (config.objfmt == OBJ_OMF) 5330 { 5331 version (MARS) 5332 { 5333 c.IEV1.Vseg = s.Sseg; 5334 } 5335 else 5336 { 5337 c.IEV1.Vseg = UDATA; 5338 } 5339 c.IEV1.Vpointer += s.Soffset; 5340 c.IFL1 = FLdatseg; 5341 } 5342 else 5343 c.IFL1 = FLextern; 5344 goto do2; 5345 5346 case FLtlsdata: 5347 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5348 c.IFL1 = FLextern; 5349 goto do2; 5350 5351 case FLdatseg: 5352 //c.IEV1.Vseg = DATA; 5353 goto do2; 5354 5355 case FLfardata: 5356 case FLcsdata: 5357 case FLpseudo: 5358 goto do2; 5359 5360 case FLstack: 5361 //printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n", 5362 //s.Soffset,EBPtoESP,base,c.IEV1.Vpointer); 5363 c.IEV1.Vpointer += s.Soffset + EBPtoESP - base - EEStack.offset; 5364 break; 5365 5366 case FLfast: 5367 soff = Fast.size; 5368 goto L1; 5369 5370 case FLreg: 5371 case FLauto: 5372 soff = Auto.size; 5373 L1: 5374 if (Symbol_Sisdead(s, anyiasm)) 5375 { 5376 c.Iop = NOP; // remove references to it 5377 continue; 5378 } 5379 if (s.Sfl == FLreg && c.IEV1.Vpointer < 2) 5380 { 5381 reg_t reg = s.Sreglsw; 5382 5383 assert(!(s.Sregm & ~mask(reg))); 5384 if (c.IEV1.Vpointer == 1) 5385 { 5386 assert(reg < 4); /* must be a BYTEREGS */ 5387 reg |= 4; /* convert to high byte reg */ 5388 } 5389 if (reg & 8) 5390 { 5391 assert(I64); 5392 c.Irex |= REX_B; 5393 reg &= 7; 5394 } 5395 c.Irm = (c.Irm & modregrm(0,7,0)) 5396 | modregrm(3,0,reg); 5397 assert(c.Iop != LES && c.Iop != LEA); 5398 goto do2; 5399 } 5400 else 5401 { c.IEV1.Vpointer += s.Soffset + soff + BPoff; 5402 if (s.Sflags & SFLunambig) 5403 c.Iflags |= CFunambig; 5404 L2: 5405 if (!hasframe || (enforcealign && c.IFL1 != FLpara)) 5406 { /* Convert to ESP relative address instead of EBP */ 5407 assert(!I16); 5408 c.IEV1.Vpointer += EBPtoESP; 5409 ubyte crm = c.Irm; 5410 if ((crm & 7) == 4) // if SIB byte 5411 { 5412 assert((c.Isib & 7) == BP); 5413 assert((crm & 0xC0) != 0); 5414 c.Isib = (c.Isib & ~7) | modregrm(0,0,SP); 5415 } 5416 else 5417 { 5418 assert((crm & 7) == 5); 5419 c.Irm = (crm & modregrm(0,7,0)) 5420 | modregrm(2,0,4); 5421 c.Isib = modregrm(0,4,SP); 5422 } 5423 } 5424 } 5425 break; 5426 5427 case FLpara: 5428 //printf("s = %s, Soffset = %d, Para.size = %d, BPoff = %d, EBPtoESP = %d\n", s.Sident.ptr, s.Soffset, Para.size, BPoff, EBPtoESP); 5429 soff = Para.size - BPoff; // cancel out add of BPoff 5430 goto L1; 5431 5432 case FLfltreg: 5433 c.IEV1.Vpointer += Foff + BPoff; 5434 c.Iflags |= CFunambig; 5435 goto L2; 5436 5437 case FLallocatmp: 5438 c.IEV1.Vpointer += Alloca.offset + BPoff; 5439 goto L2; 5440 5441 case FLfuncarg: 5442 c.IEV1.Vpointer += cgstate.funcarg.offset + BPoff; 5443 goto L2; 5444 5445 case FLbprel: 5446 c.IEV1.Vpointer += s.Soffset; 5447 break; 5448 5449 case FLcs: 5450 sn = c.IEV1.Vuns; 5451 if (!CSE.loaded(sn)) // if never loaded 5452 { 5453 c.Iop = NOP; 5454 continue; 5455 } 5456 c.IEV1.Vpointer = CSE.offset(sn) + CSoff + BPoff; 5457 c.Iflags |= CFunambig; 5458 goto L2; 5459 5460 case FLregsave: 5461 sn = c.IEV1.Vuns; 5462 c.IEV1.Vpointer = sn + regsave.off + BPoff; 5463 c.Iflags |= CFunambig; 5464 goto L2; 5465 5466 case FLndp: 5467 version (MARS) 5468 { 5469 assert(c.IEV1.Vuns < global87.save.length); 5470 } 5471 c.IEV1.Vpointer = c.IEV1.Vuns * tysize(TYldouble) + NDPoff + BPoff; 5472 c.Iflags |= CFunambig; 5473 goto L2; 5474 5475 case FLoffset: 5476 break; 5477 5478 case FLlocalsize: 5479 c.IEV1.Vpointer += localsize; 5480 break; 5481 5482 case FLconst: 5483 default: 5484 goto do2; 5485 } 5486 c.IFL1 = FLconst; 5487 do2: 5488 /* Ignore TEST (F6 and F7) opcodes */ 5489 if (!(ins & T)) goto done; /* if no second operand */ 5490 s = c.IEV2.Vsym; 5491 switch (c.IFL2) 5492 { 5493 case FLdata: 5494 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5495 { 5496 c.IFL2 = FLextern; 5497 goto do2; 5498 } 5499 else 5500 { 5501 if (s.Sclass == SCcomdat) 5502 { c.IFL2 = FLextern; 5503 goto do2; 5504 } 5505 c.IEV2.Vseg = MARS ? s.Sseg : DATA; 5506 c.IEV2.Vpointer += s.Soffset; 5507 c.IFL2 = FLdatseg; 5508 goto done; 5509 } 5510 5511 case FLudata: 5512 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5513 { 5514 c.IFL2 = FLextern; 5515 goto do2; 5516 } 5517 else 5518 { 5519 c.IEV2.Vseg = MARS ? s.Sseg : UDATA; 5520 c.IEV2.Vpointer += s.Soffset; 5521 c.IFL2 = FLdatseg; 5522 goto done; 5523 } 5524 5525 case FLtlsdata: 5526 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5527 { 5528 c.IFL2 = FLextern; 5529 goto do2; 5530 } 5531 goto done; 5532 5533 case FLdatseg: 5534 //c.IEV2.Vseg = DATA; 5535 goto done; 5536 5537 case FLcsdata: 5538 case FLfardata: 5539 goto done; 5540 5541 case FLreg: 5542 case FLpseudo: 5543 assert(0); 5544 /* NOTREACHED */ 5545 5546 case FLfast: 5547 c.IEV2.Vpointer += s.Soffset + Fast.size + BPoff; 5548 break; 5549 5550 case FLauto: 5551 c.IEV2.Vpointer += s.Soffset + Auto.size + BPoff; 5552 L3: 5553 if (!hasframe || (enforcealign && c.IFL2 != FLpara)) 5554 /* Convert to ESP relative address instead of EBP */ 5555 c.IEV2.Vpointer += EBPtoESP; 5556 break; 5557 5558 case FLpara: 5559 c.IEV2.Vpointer += s.Soffset + Para.size; 5560 goto L3; 5561 5562 case FLfltreg: 5563 c.IEV2.Vpointer += Foff + BPoff; 5564 goto L3; 5565 5566 case FLallocatmp: 5567 c.IEV2.Vpointer += Alloca.offset + BPoff; 5568 goto L3; 5569 5570 case FLfuncarg: 5571 c.IEV2.Vpointer += cgstate.funcarg.offset + BPoff; 5572 goto L3; 5573 5574 case FLbprel: 5575 c.IEV2.Vpointer += s.Soffset; 5576 break; 5577 5578 case FLstack: 5579 c.IEV2.Vpointer += s.Soffset + EBPtoESP - base; 5580 break; 5581 5582 case FLcs: 5583 case FLndp: 5584 case FLregsave: 5585 assert(0); 5586 5587 case FLconst: 5588 break; 5589 5590 case FLlocalsize: 5591 c.IEV2.Vpointer += localsize; 5592 break; 5593 5594 default: 5595 goto done; 5596 } 5597 c.IFL2 = FLconst; 5598 done: 5599 { } 5600 } 5601 } 5602 5603 /******************************* 5604 * Return offset from BP of symbol s. 5605 */ 5606 5607 targ_size_t cod3_bpoffset(Symbol *s) 5608 { 5609 targ_size_t offset; 5610 5611 symbol_debug(s); 5612 offset = s.Soffset; 5613 switch (s.Sfl) 5614 { 5615 case FLpara: 5616 offset += Para.size; 5617 break; 5618 5619 case FLfast: 5620 offset += Fast.size + BPoff; 5621 break; 5622 5623 case FLauto: 5624 offset += Auto.size + BPoff; 5625 break; 5626 5627 default: 5628 WRFL(cast(FL)s.Sfl); 5629 symbol_print(s); 5630 assert(0); 5631 } 5632 assert(hasframe); 5633 return offset; 5634 } 5635 5636 5637 /******************************* 5638 * Find shorter versions of the same instructions. 5639 * Does these optimizations: 5640 * replaces jmps to the next instruction with NOPs 5641 * sign extension of modregrm displacement 5642 * sign extension of immediate data (can't do it for OR, AND, XOR 5643 * as the opcodes are not defined) 5644 * short versions for AX EA 5645 * short versions for reg EA 5646 * Code is neither removed nor added. 5647 * Params: 5648 * b = block for code (or null) 5649 * c = code list to optimize 5650 */ 5651 5652 void pinholeopt(code *c,block *b) 5653 { 5654 targ_size_t a; 5655 uint mod; 5656 ubyte ins; 5657 int usespace; 5658 int useopsize; 5659 int space; 5660 block *bn; 5661 5662 debug 5663 { 5664 __gshared int tested; if (!tested) { tested++; pinholeopt_unittest(); } 5665 } 5666 5667 debug 5668 { 5669 code *cstart = c; 5670 if (debugc) 5671 { 5672 printf("+pinholeopt(%p)\n",c); 5673 } 5674 } 5675 5676 if (b) 5677 { 5678 bn = b.Bnext; 5679 usespace = (config.flags4 & CFG4space && b.BC != BCasm); 5680 useopsize = (I16 || (config.flags4 & CFG4space && b.BC != BCasm)); 5681 } 5682 else 5683 { 5684 bn = null; 5685 usespace = (config.flags4 & CFG4space); 5686 useopsize = (I16 || config.flags4 & CFG4space); 5687 } 5688 for (; c; c = code_next(c)) 5689 { 5690 L1: 5691 opcode_t op = c.Iop; 5692 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 5693 ins = vex_inssize(c); 5694 else if ((op & 0xFFFD00) == 0x0F3800) 5695 ins = inssize2[(op >> 8) & 0xFF]; 5696 else if ((op & 0xFF00) == 0x0F00) 5697 ins = inssize2[op & 0xFF]; 5698 else 5699 ins = inssize[op & 0xFF]; 5700 if (ins & M) // if modregrm byte 5701 { 5702 int shortop = (c.Iflags & CFopsize) ? !I16 : I16; 5703 int local_BPRM = BPRM; 5704 5705 if (c.Iflags & CFaddrsize) 5706 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 5707 5708 uint rm = c.Irm; 5709 reg_t reg = rm & modregrm(0,7,0); // isolate reg field 5710 reg_t ereg = rm & 7; 5711 //printf("c = %p, op = %02x rm = %02x\n", c, op, rm); 5712 5713 /* If immediate second operand */ 5714 if ((ins & T || 5715 ((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0))) 5716 ) && 5717 c.IFL2 == FLconst) 5718 { 5719 int flags = c.Iflags & CFpsw; /* if want result in flags */ 5720 targ_long u = c.IEV2.Vuns; 5721 if (ins & E) 5722 u = cast(byte) u; 5723 else if (shortop) 5724 u = cast(short) u; 5725 5726 // Replace CMP reg,0 with TEST reg,reg 5727 if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm 5728 rm >= modregrm(3,7,AX) && 5729 u == 0) 5730 { 5731 c.Iop = (op & 1) | 0x84; 5732 c.Irm = modregrm(3,ereg,ereg); 5733 if (c.Irex & REX_B) 5734 c.Irex |= REX_R; 5735 goto L1; 5736 } 5737 5738 /* Optimize ANDs with an immediate constant */ 5739 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0)) 5740 { 5741 if (rm >= modregrm(3,4,AX)) // AND reg,imm 5742 { 5743 if (u == 0) 5744 { 5745 /* Replace with XOR reg,reg */ 5746 c.Iop = 0x30 | (op & 1); 5747 c.Irm = modregrm(3,ereg,ereg); 5748 if (c.Irex & REX_B) 5749 c.Irex |= REX_R; 5750 goto L1; 5751 } 5752 if (u == 0xFFFFFFFF && !flags) 5753 { 5754 c.Iop = NOP; 5755 goto L1; 5756 } 5757 } 5758 if (op == 0x81 && !flags) 5759 { // If we can do the operation in one byte 5760 5761 // If EA is not SI or DI 5762 if ((rm < modregrm(3,4,SP) || I64) && 5763 (config.flags4 & CFG4space || 5764 config.target_cpu < TARGET_PentiumPro) 5765 ) 5766 { 5767 if ((u & 0xFFFFFF00) == 0xFFFFFF00) 5768 goto L2; 5769 else if (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4)) 5770 { 5771 if (!shortop) 5772 { 5773 if ((u & 0xFFFF00FF) == 0xFFFF00FF) 5774 goto L3; 5775 } 5776 else 5777 { 5778 if ((u & 0xFF) == 0xFF) 5779 goto L3; 5780 } 5781 } 5782 } 5783 if (!shortop && useopsize) 5784 { 5785 if ((u & 0xFFFF0000) == 0xFFFF0000) 5786 { 5787 c.Iflags ^= CFopsize; 5788 goto L1; 5789 } 5790 if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX)) 5791 { 5792 c.IEV1.Voffset += 2; /* address MSW */ 5793 c.IEV2.Vuns >>= 16; 5794 c.Iflags ^= CFopsize; 5795 goto L1; 5796 } 5797 if (rm >= modregrm(3,4,AX)) 5798 { 5799 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64)) 5800 { 5801 c.Iop = MOVZXb; // MOVZX 5802 c.Irm = modregrm(3,ereg,ereg); 5803 if (c.Irex & REX_B) 5804 c.Irex |= REX_R; 5805 goto L1; 5806 } 5807 if (u == 0xFFFF) 5808 { 5809 c.Iop = MOVZXw; // MOVZX 5810 c.Irm = modregrm(3,ereg,ereg); 5811 if (c.Irex & REX_B) 5812 c.Irex |= REX_R; 5813 goto L1; 5814 } 5815 } 5816 } 5817 } 5818 } 5819 5820 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */ 5821 if (!flags && 5822 (op == 0x81 || op == 0x80) && 5823 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR 5824 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR 5825 ) 5826 { 5827 if (u == 0) 5828 { 5829 c.Iop = NOP; 5830 goto L1; 5831 } 5832 if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */ 5833 { 5834 c.Iop = 0xF6 | (op & 1); /* NOT */ 5835 c.Irm ^= modregrm(0,6^2,0); 5836 goto L1; 5837 } 5838 if (!shortop && 5839 useopsize && 5840 op == 0x81 && 5841 (u & 0xFFFF0000) == 0 && 5842 (reg == modregrm(0,6,0) || reg == modregrm(0,1,0))) 5843 { 5844 c.Iflags ^= CFopsize; 5845 goto L1; 5846 } 5847 } 5848 5849 /* Look for TEST or OR or XOR with an immediate constant */ 5850 /* that we can replace with a byte operation */ 5851 if (op == 0xF7 && reg == modregrm(0,0,0) || 5852 op == 0x81 && reg == modregrm(0,6,0) && !flags || 5853 op == 0x81 && reg == modregrm(0,1,0)) 5854 { 5855 // See if we can replace a dword with a word 5856 // (avoid for 32 bit instructions, because CFopsize 5857 // is too slow) 5858 if (!shortop && useopsize) 5859 { 5860 if ((u & 0xFFFF0000) == 0) 5861 { 5862 c.Iflags ^= CFopsize; 5863 goto L1; 5864 } 5865 /* If memory (not register) addressing mode */ 5866 if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX)) 5867 { 5868 c.IEV1.Voffset += 2; /* address MSW */ 5869 c.IEV2.Vuns >>= 16; 5870 c.Iflags ^= CFopsize; 5871 goto L1; 5872 } 5873 } 5874 5875 // If EA is not SI or DI 5876 if (rm < (modregrm(3,0,SP) | reg) && 5877 (usespace || 5878 config.target_cpu < TARGET_PentiumPro) 5879 ) 5880 { 5881 if ((u & 0xFFFFFF00) == 0) 5882 { 5883 L2: c.Iop--; /* to byte instruction */ 5884 c.Iflags &= ~CFopsize; 5885 goto L1; 5886 } 5887 if (((u & 0xFFFF00FF) == 0 || 5888 (shortop && (u & 0xFF) == 0)) && 5889 (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4))) 5890 { 5891 L3: 5892 c.IEV2.Vuns >>= 8; 5893 if (rm >= (modregrm(3,0,AX) | reg)) 5894 c.Irm |= 4; /* AX.AH, BX.BH, etc. */ 5895 else 5896 c.IEV1.Voffset += 1; 5897 goto L2; 5898 } 5899 } 5900 5901 // BUG: which is right? 5902 //else if ((u & 0xFFFF0000) == 0) 5903 5904 else if (0 && op == 0xF7 && 5905 rm >= modregrm(3,0,SP) && 5906 (u & 0xFFFF0000) == 0) 5907 5908 c.Iflags &= ~CFopsize; 5909 } 5910 5911 // Try to replace TEST reg,-1 with TEST reg,reg 5912 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8 5913 { 5914 if ((u & 0xFF) == 0xFF) 5915 { 5916 L4: 5917 c.Iop = 0x84; // TEST regL,regL 5918 c.Irm = modregrm(3,ereg,ereg); 5919 if (c.Irex & REX_B) 5920 c.Irex |= REX_R; 5921 c.Iflags &= ~CFopsize; 5922 goto L1; 5923 } 5924 } 5925 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4)) 5926 { 5927 if (u == 0xFF) 5928 { 5929 if (ereg & 4) // SIL,DIL,BPL,SPL need REX prefix 5930 c.Irex |= REX; 5931 goto L4; 5932 } 5933 if ((u & 0xFFFF) == 0xFF00 && shortop && !c.Irex && ereg < 4) 5934 { 5935 ereg |= 4; /* to regH */ 5936 goto L4; 5937 } 5938 } 5939 5940 /* Look for sign extended immediate data */ 5941 if (cast(byte) u == u) 5942 { 5943 if (op == 0x81) 5944 { 5945 if (reg != 0x08 && reg != 0x20 && reg != 0x30) 5946 c.Iop = op = 0x83; /* 8 bit sgn ext */ 5947 } 5948 else if (op == 0x69) /* IMUL rw,ew,dw */ 5949 c.Iop = op = 0x6B; /* IMUL rw,ew,db */ 5950 } 5951 5952 // Look for SHIFT EA,imm8 we can replace with short form 5953 if (u == 1 && ((op & 0xFE) == 0xC0)) 5954 c.Iop |= 0xD0; 5955 5956 } /* if immediate second operand */ 5957 5958 /* Look for AX short form */ 5959 if (ins & A) 5960 { 5961 if (rm == modregrm(0,AX,local_BPRM) && 5962 !(c.Irex & REX_R) && // and it's AX, not R8 5963 (op & ~3) == 0x88 && 5964 !I64) 5965 { 5966 op = ((op & 3) + 0xA0) ^ 2; 5967 /* 8A. A0 */ 5968 /* 8B. A1 */ 5969 /* 88. A2 */ 5970 /* 89. A3 */ 5971 c.Iop = op; 5972 c.IFL2 = c.IFL1; 5973 c.IEV2 = c.IEV1; 5974 } 5975 5976 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */ 5977 else if (!I16 && 5978 (op == 0x89 || op == 0x8B) && 5979 (rm & 0xC0) == 0xC0 && 5980 (!b || b.BC != BCasm) 5981 ) 5982 c.Iflags &= ~CFopsize; 5983 5984 // If rm is AX 5985 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c.Irex & (REX_R | REX_B))) 5986 { 5987 switch (op) 5988 { 5989 case 0x80: op = reg | 4; break; 5990 case 0x81: op = reg | 5; break; 5991 case 0x87: op = 0x90 + (reg>>3); break; // XCHG 5992 5993 case 0xF6: 5994 if (reg == 0) 5995 op = 0xA8; /* TEST AL,immed8 */ 5996 break; 5997 5998 case 0xF7: 5999 if (reg == 0) 6000 op = 0xA9; /* TEST AX,immed16 */ 6001 break; 6002 6003 default: 6004 break; 6005 } 6006 c.Iop = op; 6007 } 6008 } 6009 6010 /* Look for reg short form */ 6011 if ((ins & R) && (rm & 0xC0) == 0xC0) 6012 { 6013 switch (op) 6014 { 6015 case 0xC6: op = 0xB0 + ereg; break; 6016 case 0xC7: // if no sign extension 6017 if (!(c.Irex & REX_W && c.IEV2.Vint < 0)) 6018 { 6019 c.Irm = 0; 6020 c.Irex &= ~REX_W; 6021 op = 0xB8 + ereg; 6022 } 6023 break; 6024 6025 case 0xFF: 6026 switch (reg) 6027 { case 6<<3: op = 0x50+ereg; break;/* PUSH*/ 6028 case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/ 6029 case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/ 6030 default: break; 6031 } 6032 break; 6033 6034 case 0x8F: op = 0x58 + ereg; break; 6035 case 0x87: 6036 if (reg == 0 && !(c.Irex & (REX_R | REX_B))) // Issue 12968: Needed to ensure it's referencing RAX, not R8 6037 op = 0x90 + ereg; 6038 break; 6039 6040 default: 6041 break; 6042 } 6043 c.Iop = op; 6044 } 6045 6046 // Look to remove redundant REX prefix on XOR 6047 if (c.Irex == REX_W // ignore ops involving R8..R15 6048 && (op == 0x31 || op == 0x33) // XOR 6049 && ((rm & 0xC0) == 0xC0) // register direct 6050 && ((reg >> 3) == ereg)) // register with itself 6051 { 6052 c.Irex = 0; 6053 } 6054 6055 // Look to replace SHL reg,1 with ADD reg,reg 6056 if ((op & ~1) == 0xD0 && 6057 (rm & modregrm(3,7,0)) == modregrm(3,4,0) && 6058 config.target_cpu >= TARGET_80486) 6059 { 6060 c.Iop &= 1; 6061 c.Irm = cast(ubyte)((rm & modregrm(3,0,7)) | (ereg << 3)); 6062 if (c.Irex & REX_B) 6063 c.Irex |= REX_R; 6064 if (!(c.Iflags & CFpsw) && !I16) 6065 c.Iflags &= ~CFopsize; 6066 goto L1; 6067 } 6068 6069 /* Look for sign extended modregrm displacement, or 0 6070 * displacement. 6071 */ 6072 6073 if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp 6074 c.IFL1 == FLconst) // and it's a constant 6075 { 6076 a = c.IEV1.Vpointer; 6077 if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp] 6078 !(local_BPRM == 5 && (rm & 7) == 4 && (c.Isib & 7) == BP) 6079 ) 6080 c.Irm &= 0x3F; 6081 else if (!I16) 6082 { 6083 if (cast(targ_size_t)cast(targ_schar)a == a) 6084 c.Irm ^= 0xC0; /* do 8 sx */ 6085 } 6086 else if ((cast(targ_size_t)cast(targ_schar)a & 0xFFFF) == (a & 0xFFFF)) 6087 c.Irm ^= 0xC0; /* do 8 sx */ 6088 } 6089 6090 /* Look for LEA reg,[ireg], replace with MOV reg,ireg */ 6091 if (op == LEA) 6092 { 6093 rm = c.Irm & 7; 6094 mod = c.Irm & modregrm(3,0,0); 6095 if (mod == 0) 6096 { 6097 if (!I16) 6098 { 6099 switch (rm) 6100 { 6101 case 4: 6102 case 5: 6103 break; 6104 6105 default: 6106 c.Irm |= modregrm(3,0,0); 6107 c.Iop = 0x8B; 6108 break; 6109 } 6110 } 6111 else 6112 { 6113 switch (rm) 6114 { 6115 case 4: rm = modregrm(3,0,SI); goto L6; 6116 case 5: rm = modregrm(3,0,DI); goto L6; 6117 case 7: rm = modregrm(3,0,BX); goto L6; 6118 L6: c.Irm = cast(ubyte)(rm + reg); 6119 c.Iop = 0x8B; 6120 break; 6121 6122 default: 6123 break; 6124 } 6125 } 6126 } 6127 6128 /* replace LEA reg,0[BP] with MOV reg,BP */ 6129 else if (mod == modregrm(1,0,0) && rm == local_BPRM && 6130 c.IFL1 == FLconst && c.IEV1.Vpointer == 0) 6131 { 6132 c.Iop = 0x8B; /* MOV reg,BP */ 6133 c.Irm = cast(ubyte)(modregrm(3,0,BP) + reg); 6134 } 6135 } 6136 6137 // Replace [R13] with 0[R13] 6138 if (c.Irex & REX_B && ((c.Irm & modregrm(3,0,7)) == modregrm(0,0,BP) || 6139 issib(c.Irm) && (c.Irm & modregrm(3,0,0)) == 0 && (c.Isib & 7) == BP)) 6140 { 6141 c.Irm |= modregrm(1,0,0); 6142 c.IFL1 = FLconst; 6143 c.IEV1.Vpointer = 0; 6144 } 6145 } 6146 else if (!(c.Iflags & CFvex)) 6147 { 6148 switch (op) 6149 { 6150 default: 6151 // Look for MOV r64, immediate 6152 if ((c.Irex & REX_W) && (op & ~7) == 0xB8) 6153 { 6154 /* Look for zero extended immediate data */ 6155 if (c.IEV2.Vsize_t == c.IEV2.Vuns) 6156 { 6157 c.Irex &= ~REX_W; 6158 } 6159 /* Look for sign extended immediate data */ 6160 else if (c.IEV2.Vsize_t == c.IEV2.Vint) 6161 { 6162 c.Irm = modregrm(3,0,op & 7); 6163 c.Iop = op = 0xC7; 6164 c.IEV2.Vsize_t = c.IEV2.Vuns; 6165 } 6166 } 6167 if ((op & ~0x0F) != 0x70) 6168 break; 6169 goto case JMP; 6170 6171 case JMP: 6172 switch (c.IFL2) 6173 { 6174 case FLcode: 6175 if (c.IEV2.Vcode == code_next(c)) 6176 { 6177 c.Iop = NOP; 6178 continue; 6179 } 6180 break; 6181 6182 case FLblock: 6183 if (!code_next(c) && c.IEV2.Vblock == bn) 6184 { 6185 c.Iop = NOP; 6186 continue; 6187 } 6188 break; 6189 6190 case FLconst: 6191 case FLfunc: 6192 case FLextern: 6193 break; 6194 6195 default: 6196 WRFL(cast(FL)c.IFL2); 6197 assert(0); 6198 } 6199 break; 6200 6201 case 0x68: // PUSH immed16 6202 if (c.IFL2 == FLconst) 6203 { 6204 targ_long u = c.IEV2.Vuns; 6205 if (I64 || 6206 ((c.Iflags & CFopsize) ? I16 : I32)) 6207 { // PUSH 32/64 bit operand 6208 if (u == cast(byte) u) 6209 c.Iop = 0x6A; // PUSH immed8 6210 } 6211 else // PUSH 16 bit operand 6212 { 6213 if (cast(short)u == cast(byte) u) 6214 c.Iop = 0x6A; // PUSH immed8 6215 } 6216 } 6217 break; 6218 } 6219 } 6220 } 6221 6222 debug 6223 if (debugc) 6224 { 6225 printf("-pinholeopt(%p)\n",cstart); 6226 for (c = cstart; c; c = code_next(c)) 6227 code_print(c); 6228 } 6229 } 6230 6231 6232 debug 6233 { 6234 private void pinholeopt_unittest() 6235 { 6236 //printf("pinholeopt_unittest()\n"); 6237 static struct CS 6238 { 6239 uint model,op,ea; 6240 targ_size_t ev1,ev2; 6241 uint flags; 6242 } 6243 __gshared CS[2][22] tests = 6244 [ 6245 // XOR reg,immed NOT regL 6246 [ { 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF } ], 6247 6248 // MOV 0[BX],3 MOV [BX],3 6249 [ { 16,0xC7,modregrm(2,0,7),0,3 }, { 0,0xC7,modregrm(0,0,7),0,3 } ], 6250 6251 /+ // only if config.flags4 & CFG4space 6252 // TEST regL,immed8 6253 [ { 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 6254 [ { 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 6255 [ { 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 6256 [ { 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 6257 +/ 6258 6259 // PUSH immed => PUSH immed8 6260 [ { 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }], 6261 [ { 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }], 6262 [ { 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }], 6263 [ { 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 6264 [ { 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 6265 [ { 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 6266 [ { 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }], 6267 [ { 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }], 6268 [ { 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 6269 [ { 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 6270 [ { 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 6271 [ { 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }], 6272 [ { 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }], 6273 6274 // clear r64, for r64 != R8..R15 6275 [ { 64,0x31,0x800C0,0,0,0 }, { 0,0x31,0xC0,0,0,0}], 6276 [ { 64,0x33,0x800C0,0,0,0 }, { 0,0x33,0xC0,0,0,0}], 6277 6278 // MOV r64, immed 6279 [ { 64,0xC7,0x800C0,0,0xFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,0xFFFFFFFF,0}], 6280 [ { 64,0xC7,0x800C0,0,0x7FFFFFFF,0 }, { 0,0xB8,0,0,0x7FFFFFFF,0}], 6281 [ { 64,0xB8,0x80000,0,0xFFFFFFFF,0 }, { 0,0xB8,0,0,0xFFFFFFFF,0 }], 6282 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }, { 0,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }], 6283 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0xFFFFFFFFFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,cast(targ_size_t)0xFFFFFFFF,0}], 6284 ]; 6285 6286 //config.flags4 |= CFG4space; 6287 for (int i = 0; i < tests.length; i++) 6288 { CS *pin = &tests[i][0]; 6289 CS *pout = &tests[i][1]; 6290 code cs = void; 6291 memset(&cs, 0, cs.sizeof); 6292 if (pin.model) 6293 { 6294 if (I16 && pin.model != 16) 6295 continue; 6296 if (I32 && pin.model != 32) 6297 continue; 6298 if (I64 && pin.model != 64) 6299 continue; 6300 } 6301 //printf("[%d]\n", i); 6302 cs.Iop = pin.op; 6303 cs.Iea = pin.ea; 6304 cs.IFL1 = FLconst; 6305 cs.IFL2 = FLconst; 6306 cs.IEV1.Vsize_t = pin.ev1; 6307 cs.IEV2.Vsize_t = pin.ev2; 6308 cs.Iflags = pin.flags; 6309 pinholeopt(&cs, null); 6310 if (cs.Iop != pout.op) 6311 { printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout.op); 6312 assert(0); 6313 } 6314 assert(cs.Iea == pout.ea); 6315 assert(cs.IEV1.Vsize_t == pout.ev1); 6316 assert(cs.IEV2.Vsize_t == pout.ev2); 6317 assert(cs.Iflags == pout.flags); 6318 } 6319 } 6320 } 6321 6322 void simplify_code(code* c) 6323 { 6324 reg_t reg; 6325 if (config.flags4 & CFG4optimized && 6326 (c.Iop == 0x81 || c.Iop == 0x80) && 6327 c.IFL2 == FLconst && 6328 reghasvalue((c.Iop == 0x80) ? BYTEREGS : ALLREGS,I64 ? c.IEV2.Vsize_t : c.IEV2.Vlong,®) && 6329 !(I16 && c.Iflags & CFopsize) 6330 ) 6331 { 6332 // See if we can replace immediate instruction with register instruction 6333 static immutable ubyte[8] regop = 6334 [ 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 ]; 6335 6336 //printf("replacing 0x%02x, val = x%lx\n",c.Iop,c.IEV2.Vlong); 6337 c.Iop = regop[(c.Irm & modregrm(0,7,0)) >> 3] | (c.Iop & 1); 6338 code_newreg(c, reg); 6339 if (I64 && !(c.Iop & 1) && (reg & 4)) 6340 c.Irex |= REX; 6341 } 6342 } 6343 6344 /************************** 6345 * Compute jump addresses for FLcode. 6346 * Note: only works for forward referenced code. 6347 * only direct jumps and branches are detected. 6348 * LOOP instructions only work for backward refs. 6349 */ 6350 6351 void jmpaddr(code *c) 6352 { 6353 code* ci,cn,ctarg,cstart; 6354 targ_size_t ad; 6355 6356 //printf("jmpaddr()\n"); 6357 cstart = c; /* remember start of code */ 6358 while (c) 6359 { 6360 const op = c.Iop; 6361 if (op <= 0xEB && 6362 inssize[op] & T && // if second operand 6363 c.IFL2 == FLcode && 6364 ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL)) 6365 { 6366 ci = code_next(c); 6367 ctarg = c.IEV2.Vcode; /* target code */ 6368 ad = 0; /* IP displacement */ 6369 while (ci && ci != ctarg) 6370 { 6371 ad += calccodsize(ci); 6372 ci = code_next(ci); 6373 } 6374 if (!ci) 6375 goto Lbackjmp; // couldn't find it 6376 if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL) 6377 c.IEV2.Vpointer = ad; 6378 else /* else conditional */ 6379 { 6380 if (!(c.Iflags & CFjmp16)) /* if branch */ 6381 c.IEV2.Vpointer = ad; 6382 else /* branch around a long jump */ 6383 { 6384 cn = code_next(c); 6385 c.next = code_calloc(); 6386 code_next(c).next = cn; 6387 c.Iop = op ^ 1; /* converse jmp */ 6388 c.Iflags &= ~CFjmp16; 6389 c.IEV2.Vpointer = I16 ? 3 : 5; 6390 cn = code_next(c); 6391 cn.Iop = JMP; /* long jump */ 6392 cn.IFL2 = FLconst; 6393 cn.IEV2.Vpointer = ad; 6394 } 6395 } 6396 c.IFL2 = FLconst; 6397 } 6398 if (op == LOOP && c.IFL2 == FLcode) /* backwards refs */ 6399 { 6400 Lbackjmp: 6401 ctarg = c.IEV2.Vcode; 6402 for (ci = cstart; ci != ctarg; ci = code_next(ci)) 6403 if (!ci || ci == c) 6404 assert(0); 6405 ad = 2; /* - IP displacement */ 6406 while (ci != c) 6407 { 6408 assert(ci); 6409 ad += calccodsize(ci); 6410 ci = code_next(ci); 6411 } 6412 c.IEV2.Vpointer = (-ad) & 0xFF; 6413 c.IFL2 = FLconst; 6414 } 6415 c = code_next(c); 6416 } 6417 } 6418 6419 /******************************* 6420 * Calculate bl.Bsize. 6421 */ 6422 6423 uint calcblksize(code *c) 6424 { 6425 uint size; 6426 for (size = 0; c; c = code_next(c)) 6427 { 6428 uint sz = calccodsize(c); 6429 //printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c.Iop); 6430 size += sz; 6431 } 6432 //printf("calcblksize(c = x%x) = %d\n", c, size); 6433 return size; 6434 } 6435 6436 /***************************** 6437 * Calculate and return code size of a code. 6438 * Note that NOPs are sometimes used as markers, but are 6439 * never output. LINNUMs are never output. 6440 * Note: This routine must be fast. Profiling shows it is significant. 6441 */ 6442 6443 uint calccodsize(code *c) 6444 { 6445 uint size; 6446 ubyte rm,mod,ins; 6447 uint iflags; 6448 uint i32 = I32 || I64; 6449 uint a32 = i32; 6450 6451 debug 6452 assert((a32 & ~1) == 0); 6453 6454 iflags = c.Iflags; 6455 opcode_t op = c.Iop; 6456 //printf("calccodsize(x%08x), Iflags = x%x\n", op, iflags); 6457 if (iflags & CFvex && c.Ivex.pfx == 0xC4) 6458 { 6459 ins = vex_inssize(c); 6460 size = ins & 7; 6461 goto Lmodrm; 6462 } 6463 else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800) 6464 op = 0x0F; 6465 else 6466 op &= 0xFF; 6467 switch (op) 6468 { 6469 case 0x0F: 6470 if ((c.Iop & 0xFFFD00) == 0x0F3800) 6471 { // 3 byte op ( 0F38-- or 0F3A-- ) 6472 ins = inssize2[(c.Iop >> 8) & 0xFF]; 6473 size = ins & 7; 6474 if (c.Iop & 0xFF000000) 6475 size++; 6476 } 6477 else 6478 { // 2 byte op ( 0F-- ) 6479 ins = inssize2[c.Iop & 0xFF]; 6480 size = ins & 7; 6481 if (c.Iop & 0xFF0000) 6482 size++; 6483 } 6484 break; 6485 6486 case 0x90: 6487 size = (c.Iop == PAUSE) ? 2 : 1; 6488 goto Lret2; 6489 6490 case NOP: 6491 case ESCAPE: 6492 size = 0; // since these won't be output 6493 goto Lret2; 6494 6495 case ASM: 6496 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6497 size = _tysize[TYnptr]; 6498 else 6499 size = cast(uint)c.IEV1.len; 6500 goto Lret2; 6501 6502 case 0xA1: 6503 case 0xA3: 6504 if (c.Irex) 6505 { 6506 size = 9; // 64 bit immediate value for MOV to/from RAX 6507 goto Lret; 6508 } 6509 goto Ldefault; 6510 6511 case 0xF6: /* TEST mem8,immed8 */ 6512 ins = inssize[op]; 6513 size = ins & 7; 6514 if (i32) 6515 size = inssize32[op]; 6516 if ((c.Irm & (7<<3)) == 0) 6517 size++; /* size of immed8 */ 6518 break; 6519 6520 case 0xF7: 6521 ins = inssize[op]; 6522 size = ins & 7; 6523 if (i32) 6524 size = inssize32[op]; 6525 if ((c.Irm & (7<<3)) == 0) 6526 size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2; 6527 break; 6528 6529 default: 6530 Ldefault: 6531 ins = inssize[op]; 6532 size = ins & 7; 6533 if (i32) 6534 size = inssize32[op]; 6535 } 6536 6537 if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG)) 6538 { 6539 if (iflags & CFwait) // if add FWAIT prefix 6540 size++; 6541 if (iflags & CFSEG) // if segment override 6542 size++; 6543 6544 // If the instruction has a second operand that is not an 8 bit, 6545 // and the operand size prefix is present, then fix the size computation 6546 // because the operand size will be different. 6547 // Walter, I had problems with this bit at the end. There can still be 6548 // an ADDRSIZE prefix for these and it does indeed change the operand size. 6549 6550 if (iflags & (CFopsize | CFaddrsize)) 6551 { 6552 if ((ins & (T|E)) == T) 6553 { 6554 if ((op & 0xAC) == 0xA0) 6555 { 6556 if (iflags & CFaddrsize && !I64) 6557 { if (I32) 6558 size -= 2; 6559 else 6560 size += 2; 6561 } 6562 } 6563 else if (iflags & CFopsize) 6564 { if (I16) 6565 size += 2; 6566 else 6567 size -= 2; 6568 } 6569 } 6570 if (iflags & CFaddrsize) 6571 { if (!I64) 6572 a32 ^= 1; 6573 size++; 6574 } 6575 if (iflags & CFopsize) 6576 size++; /* +1 for OPSIZE prefix */ 6577 } 6578 } 6579 6580 Lmodrm: 6581 if ((op & ~0x0F) == 0x70) 6582 { 6583 if (iflags & CFjmp16) // if long branch 6584 size += I16 ? 3 : 4; // + 3(4) bytes for JMP 6585 } 6586 else if (ins & M) // if modregrm byte 6587 { 6588 rm = c.Irm; 6589 mod = rm & 0xC0; 6590 if (a32 || I64) 6591 { // 32 bit addressing 6592 if (issib(rm)) 6593 size++; 6594 switch (mod) 6595 { case 0: 6596 if (issib(rm) && (c.Isib & 7) == 5 || 6597 (rm & 7) == 5) 6598 size += 4; /* disp32 */ 6599 if (c.Irex & REX_B && (rm & 7) == 5) 6600 /* Instead of selecting R13, this mode is an [RIP] relative 6601 * address. Although valid, it's redundant, and should not 6602 * be generated. Instead, generate 0[R13] instead of [R13]. 6603 */ 6604 assert(0); 6605 break; 6606 6607 case 0x40: 6608 size++; /* disp8 */ 6609 break; 6610 6611 case 0x80: 6612 size += 4; /* disp32 */ 6613 break; 6614 6615 default: 6616 break; 6617 } 6618 } 6619 else 6620 { // 16 bit addressing 6621 if (mod == 0x40) /* 01: 8 bit displacement */ 6622 size++; 6623 else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6)) 6624 size += 2; 6625 } 6626 } 6627 6628 Lret: 6629 if (!(iflags & CFvex) && c.Irex) 6630 { 6631 size++; 6632 if (c.Irex & REX_W && (op & ~7) == 0xB8) 6633 size += 4; 6634 } 6635 Lret2: 6636 //printf("op = x%02x, size = %d\n",op,size); 6637 return size; 6638 } 6639 6640 /******************************** 6641 * Return !=0 if codes match. 6642 */ 6643 6644 static if (0) 6645 { 6646 6647 int code_match(code *c1,code *c2) 6648 { 6649 code cs1,cs2; 6650 ubyte ins; 6651 6652 if (c1 == c2) 6653 goto match; 6654 cs1 = *c1; 6655 cs2 = *c2; 6656 if (cs1.Iop != cs2.Iop) 6657 goto nomatch; 6658 switch (cs1.Iop) 6659 { 6660 case ESCAPE | ESCctor: 6661 case ESCAPE | ESCdtor: 6662 goto nomatch; 6663 6664 case NOP: 6665 goto match; 6666 6667 case ASM: 6668 if (cs1.IEV1.len == cs2.IEV1.len && 6669 memcmp(cs1.IEV1.bytes,cs2.IEV1.bytes,cs1.EV1.len) == 0) 6670 goto match; 6671 else 6672 goto nomatch; 6673 6674 default: 6675 if ((cs1.Iop & 0xFF) == ESCAPE) 6676 goto match; 6677 break; 6678 } 6679 if (cs1.Iflags != cs2.Iflags) 6680 goto nomatch; 6681 6682 ins = inssize[cs1.Iop & 0xFF]; 6683 if ((cs1.Iop & 0xFFFD00) == 0x0F3800) 6684 { 6685 ins = inssize2[(cs1.Iop >> 8) & 0xFF]; 6686 } 6687 else if ((cs1.Iop & 0xFF00) == 0x0F00) 6688 { 6689 ins = inssize2[cs1.Iop & 0xFF]; 6690 } 6691 6692 if (ins & M) // if modregrm byte 6693 { 6694 if (cs1.Irm != cs2.Irm) 6695 goto nomatch; 6696 if ((cs1.Irm & 0xC0) == 0xC0) 6697 goto do2; 6698 if (is32bitaddr(I32,cs1.Iflags)) 6699 { 6700 if (issib(cs1.Irm) && cs1.Isib != cs2.Isib) 6701 goto nomatch; 6702 if ( 6703 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 6704 ) 6705 goto do2; /* if no first operand */ 6706 } 6707 else 6708 { 6709 if ( 6710 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 6711 ) 6712 goto do2; /* if no first operand */ 6713 } 6714 if (cs1.IFL1 != cs2.IFL1) 6715 goto nomatch; 6716 if (flinsymtab[cs1.IFL1] && cs1.IEV1.Vsym != cs2.IEV1.Vsym) 6717 goto nomatch; 6718 if (cs1.IEV1.Voffset != cs2.IEV1.Voffset) 6719 goto nomatch; 6720 } 6721 6722 do2: 6723 if (!(ins & T)) // if no second operand 6724 goto match; 6725 if (cs1.IFL2 != cs2.IFL2) 6726 goto nomatch; 6727 if (flinsymtab[cs1.IFL2] && cs1.IEV2.Vsym != cs2.IEV2.Vsym) 6728 goto nomatch; 6729 if (cs1.IEV2.Voffset != cs2.IEV2.Voffset) 6730 goto nomatch; 6731 6732 match: 6733 return 1; 6734 6735 nomatch: 6736 return 0; 6737 } 6738 6739 } 6740 6741 /************************** 6742 * Write code to intermediate file. 6743 * Code starts at offset. 6744 * Returns: 6745 * addr of end of code 6746 */ 6747 6748 private struct MiniCodeBuf 6749 { 6750 nothrow: 6751 size_t index; 6752 size_t offset; 6753 int seg; 6754 char[100] bytes; // = void; 6755 6756 this(int seg) 6757 { 6758 index = 0; 6759 this.offset = cast(size_t)Offset(seg); 6760 this.seg = seg; 6761 } 6762 6763 void flushx() 6764 { 6765 // Emit accumulated bytes to code segment 6766 debug assert(index < bytes.length); 6767 offset += objmod.bytes(seg, offset, cast(uint)index, bytes.ptr); 6768 index = 0; 6769 } 6770 6771 void gen(char c) { bytes[index++] = c; } 6772 6773 void genp(size_t n, void *p) { memcpy(&bytes[index], p, n); index += n; } 6774 6775 void flush() { if (index) flushx(); } 6776 6777 uint getOffset() { return cast(uint)(offset + index); } 6778 6779 uint available() { return cast(uint)(bytes.sizeof - index); } 6780 } 6781 6782 private void do8bit(MiniCodeBuf *pbuf, FL, evc *); 6783 private void do16bit(MiniCodeBuf *pbuf, FL, evc *,int); 6784 private void do32bit(MiniCodeBuf *pbuf, FL, evc *,int,int = 0); 6785 private void do64bit(MiniCodeBuf *pbuf, FL, evc *,int); 6786 6787 uint codout(int seg, code *c) 6788 { 6789 ubyte rm,mod; 6790 ubyte ins; 6791 code *cn; 6792 uint flags; 6793 Symbol *s; 6794 6795 debug 6796 if (debugc) printf("codout(%p), Coffset = x%llx\n",c,cast(ulong)Offset(seg)); 6797 6798 MiniCodeBuf ggen = void; 6799 ggen.index = 0; 6800 ggen.offset = cast(size_t)Offset(seg); 6801 ggen.seg = seg; 6802 6803 for (; c; c = code_next(c)) 6804 { 6805 debug 6806 { 6807 if (debugc) { printf("off=%02x, sz=%d, ",cast(int)ggen.getOffset(),cast(int)calccodsize(c)); code_print(c); } 6808 uint startoffset = ggen.getOffset(); 6809 } 6810 6811 opcode_t op = c.Iop; 6812 ins = inssize[op & 0xFF]; 6813 switch (op & 0xFF) 6814 { 6815 case ESCAPE: 6816 /* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */ 6817 if(op == 0x660F383E || c.Iflags & CFvex) break; 6818 6819 switch (op & 0xFFFF00) 6820 { case ESClinnum: 6821 /* put out line number stuff */ 6822 objmod.linnum(c.IEV1.Vsrcpos,seg,ggen.getOffset()); 6823 break; 6824 version (SCPP) 6825 { 6826 static if (1) 6827 { 6828 case ESCctor: 6829 case ESCdtor: 6830 case ESCoffset: 6831 if (config.exe != EX_WIN32) 6832 except_pair_setoffset(c,ggen.getOffset() - funcoffset); 6833 break; 6834 6835 case ESCmark: 6836 case ESCrelease: 6837 case ESCmark2: 6838 case ESCrelease2: 6839 break; 6840 } 6841 else 6842 { 6843 case ESCctor: 6844 except_push(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null); 6845 break; 6846 6847 case ESCdtor: 6848 except_pop(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null); 6849 break; 6850 6851 case ESCmark: 6852 except_mark(); 6853 break; 6854 6855 case ESCrelease: 6856 except_release(); 6857 break; 6858 } 6859 } 6860 case ESCadjesp: 6861 //printf("adjust ESP %ld\n", (long)c.IEV1.Vint); 6862 break; 6863 6864 default: 6865 break; 6866 } 6867 6868 debug 6869 assert(calccodsize(c) == 0); 6870 6871 continue; 6872 6873 case NOP: /* don't send them out */ 6874 if (op != NOP) 6875 break; 6876 debug 6877 assert(calccodsize(c) == 0); 6878 6879 continue; 6880 6881 case ASM: 6882 if (op != ASM) 6883 break; 6884 ggen.flush(); 6885 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6886 { 6887 do32bit(&ggen, FLblockoff,&c.IEV1,0); 6888 } 6889 else 6890 { 6891 ggen.offset += objmod.bytes(seg,ggen.offset,cast(uint)c.IEV1.len,c.IEV1.bytes); 6892 } 6893 debug 6894 assert(calccodsize(c) == c.IEV1.len); 6895 6896 continue; 6897 6898 default: 6899 break; 6900 } 6901 flags = c.Iflags; 6902 6903 // See if we need to flush (don't have room for largest code sequence) 6904 if (ggen.available() < (1+4+4+8+8)) 6905 ggen.flush(); 6906 6907 // see if we need to put out prefix bytes 6908 if (flags & (CFwait | CFPREFIX | CFjmp16)) 6909 { 6910 int override_; 6911 6912 if (flags & CFwait) 6913 ggen.gen(0x9B); // FWAIT 6914 /* ? SEGES : SEGSS */ 6915 switch (flags & CFSEG) 6916 { case CFes: override_ = SEGES; goto segover; 6917 case CFss: override_ = SEGSS; goto segover; 6918 case CFcs: override_ = SEGCS; goto segover; 6919 case CFds: override_ = SEGDS; goto segover; 6920 case CFfs: override_ = SEGFS; goto segover; 6921 case CFgs: override_ = SEGGS; goto segover; 6922 segover: ggen.gen(cast(ubyte)override_); 6923 break; 6924 6925 default: break; 6926 } 6927 6928 if (flags & CFaddrsize) 6929 ggen.gen(0x67); 6930 6931 // Do this last because of instructions like ADDPD 6932 if (flags & CFopsize) 6933 ggen.gen(0x66); /* operand size */ 6934 6935 if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */ 6936 { 6937 if (!I16) 6938 { // Put out 16 bit conditional jump 6939 c.Iop = op = 0x0F00 | (0x80 | (op & 0x0F)); 6940 } 6941 else 6942 { 6943 cn = code_calloc(); 6944 /*cxcalloc++;*/ 6945 cn.next = code_next(c); 6946 c.next= cn; // link into code 6947 cn.Iop = JMP; // JMP block 6948 cn.IFL2 = c.IFL2; 6949 cn.IEV2.Vblock = c.IEV2.Vblock; 6950 c.Iop = op ^= 1; // toggle condition 6951 c.IFL2 = FLconst; 6952 c.IEV2.Vpointer = I16 ? 3 : 5; // skip over JMP block 6953 c.Iflags &= ~CFjmp16; 6954 } 6955 } 6956 } 6957 6958 if (flags & CFvex) 6959 { 6960 if (flags & CFvex3) 6961 { 6962 ggen.gen(0xC4); 6963 ggen.gen(cast(ubyte)VEX3_B1(c.Ivex)); 6964 ggen.gen(cast(ubyte)VEX3_B2(c.Ivex)); 6965 ggen.gen(c.Ivex.op); 6966 } 6967 else 6968 { 6969 ggen.gen(0xC5); 6970 ggen.gen(cast(ubyte)VEX2_B1(c.Ivex)); 6971 ggen.gen(c.Ivex.op); 6972 } 6973 ins = vex_inssize(c); 6974 goto Lmodrm; 6975 } 6976 6977 if (op > 0xFF) 6978 { 6979 if ((op & 0xFFFD00) == 0x0F3800) 6980 ins = inssize2[(op >> 8) & 0xFF]; 6981 else if ((op & 0xFF00) == 0x0F00) 6982 ins = inssize2[op & 0xFF]; 6983 6984 if (op & 0xFF000000) 6985 { 6986 ubyte op1 = op >> 24; 6987 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 6988 { 6989 ggen.gen(op1); 6990 if (c.Irex) 6991 ggen.gen(c.Irex | REX); 6992 } 6993 else 6994 { 6995 if (c.Irex) 6996 ggen.gen(c.Irex | REX); 6997 ggen.gen(op1); 6998 } 6999 ggen.gen((op >> 16) & 0xFF); 7000 ggen.gen((op >> 8) & 0xFF); 7001 ggen.gen(op & 0xFF); 7002 } 7003 else if (op & 0xFF0000) 7004 { 7005 ubyte op1 = cast(ubyte)(op >> 16); 7006 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 7007 { 7008 ggen.gen(op1); 7009 if (c.Irex) 7010 ggen.gen(c.Irex | REX); 7011 } 7012 else 7013 { 7014 if (c.Irex) 7015 ggen.gen(c.Irex | REX); 7016 ggen.gen(op1); 7017 } 7018 ggen.gen((op >> 8) & 0xFF); 7019 ggen.gen(op & 0xFF); 7020 } 7021 else 7022 { 7023 if (c.Irex) 7024 ggen.gen(c.Irex | REX); 7025 ggen.gen((op >> 8) & 0xFF); 7026 ggen.gen(op & 0xFF); 7027 } 7028 } 7029 else 7030 { 7031 if (c.Irex) 7032 ggen.gen(c.Irex | REX); 7033 ggen.gen(cast(ubyte)op); 7034 } 7035 Lmodrm: 7036 if (ins & M) /* if modregrm byte */ 7037 { 7038 rm = c.Irm; 7039 ggen.gen(rm); 7040 7041 // Look for an address size override when working with the 7042 // MOD R/M and SIB bytes 7043 7044 if (is32bitaddr( I32, flags)) 7045 { 7046 if (issib(rm)) 7047 ggen.gen(c.Isib); 7048 switch (rm & 0xC0) 7049 { 7050 case 0x40: 7051 do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1); // 8 bit 7052 break; 7053 7054 case 0: 7055 if (!(issib(rm) && (c.Isib & 7) == 5 || 7056 (rm & 7) == 5)) 7057 break; 7058 goto case 0x80; 7059 7060 case 0x80: 7061 { 7062 int cfflags = CFoff; 7063 targ_size_t val = 0; 7064 if (I64) 7065 { 7066 if ((rm & modregrm(3,0,7)) == modregrm(0,0,5)) // if disp32[RIP] 7067 { 7068 cfflags |= CFpc32; 7069 val = -4; 7070 reg_t reg = rm & modregrm(0,7,0); 7071 if (ins & T || 7072 ((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0)))) 7073 { if (ins & E || op == 0xF6) 7074 val = -5; 7075 else if (c.Iflags & CFopsize) 7076 val = -6; 7077 else 7078 val = -8; 7079 } 7080 static if (TARGET_OSX || TARGET_WINDOS) 7081 { 7082 /* Mach-O and Win64 fixups already take the 4 byte size 7083 * into account, so bias by 4 7084 ` */ 7085 val += 4; 7086 } 7087 } 7088 } 7089 do32bit(&ggen, cast(FL)c.IFL1,&c.IEV1,cfflags,cast(int)val); 7090 break; 7091 } 7092 7093 default: 7094 break; 7095 } 7096 } 7097 else 7098 { 7099 switch (rm & 0xC0) 7100 { case 0x40: 7101 do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1); // 8 bit 7102 break; 7103 7104 case 0: 7105 if ((rm & 7) != 6) 7106 break; 7107 goto case 0x80; 7108 7109 case 0x80: 7110 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,CFoff); 7111 break; 7112 7113 default: 7114 break; 7115 } 7116 } 7117 } 7118 else 7119 { 7120 if (op == ENTER) 7121 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,0); 7122 } 7123 flags &= CFseg | CFoff | CFselfrel; 7124 if (ins & T) /* if second operand */ 7125 { 7126 if (ins & E) /* if data-8 */ 7127 do8bit(&ggen, cast(FL) c.IFL2,&c.IEV2); 7128 else if (!I16) 7129 { 7130 switch (op) 7131 { 7132 case 0xC2: /* RETN imm16 */ 7133 case 0xCA: /* RETF imm16 */ 7134 do16: 7135 do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7136 break; 7137 7138 case 0xA1: 7139 case 0xA3: 7140 if (I64 && c.Irex) 7141 { 7142 do64: 7143 do64bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7144 break; 7145 } 7146 goto case 0xA0; 7147 7148 case 0xA0: /* MOV AL,byte ptr [] */ 7149 case 0xA2: 7150 if (c.Iflags & CFaddrsize && !I64) 7151 goto do16; 7152 else 7153 do32: 7154 do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7155 break; 7156 7157 case 0x9A: 7158 case 0xEA: 7159 if (c.Iflags & CFopsize) 7160 goto ptr1616; 7161 else 7162 goto ptr1632; 7163 7164 case 0x68: // PUSH immed32 7165 if (cast(FL)c.IFL2 == FLblock) 7166 { 7167 c.IFL2 = FLblockoff; 7168 goto do32; 7169 } 7170 else 7171 goto case_default; 7172 7173 case CALL: // CALL rel 7174 case JMP: // JMP rel 7175 flags |= CFselfrel; 7176 goto case_default; 7177 7178 default: 7179 if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32 7180 flags |= CFselfrel; 7181 if (I64 && (op & ~7) == 0xB8 && c.Irex & REX_W) 7182 goto do64; 7183 case_default: 7184 if (c.Iflags & CFopsize) 7185 goto do16; 7186 else 7187 goto do32; 7188 } 7189 } 7190 else 7191 { 7192 switch (op) 7193 { 7194 case 0xC2: 7195 case 0xCA: 7196 goto do16; 7197 7198 case 0xA0: 7199 case 0xA1: 7200 case 0xA2: 7201 case 0xA3: 7202 if (c.Iflags & CFaddrsize) 7203 goto do32; 7204 else 7205 goto do16; 7206 7207 case 0x9A: 7208 case 0xEA: 7209 if (c.Iflags & CFopsize) 7210 goto ptr1632; 7211 else 7212 goto ptr1616; 7213 7214 ptr1616: 7215 ptr1632: 7216 //assert(c.IFL2 == FLfunc); 7217 ggen.flush(); 7218 if (c.IFL2 == FLdatseg) 7219 { 7220 objmod.reftodatseg(seg,ggen.offset,c.IEV2.Vpointer, 7221 c.IEV2.Vseg,flags); 7222 ggen.offset += 4; 7223 } 7224 else 7225 { 7226 s = c.IEV2.Vsym; 7227 ggen.offset += objmod.reftoident(seg,ggen.offset,s,0,flags); 7228 } 7229 break; 7230 7231 case 0x68: // PUSH immed16 7232 if (cast(FL)c.IFL2 == FLblock) 7233 { c.IFL2 = FLblockoff; 7234 goto do16; 7235 } 7236 else 7237 goto case_default16; 7238 7239 case CALL: 7240 case JMP: 7241 flags |= CFselfrel; 7242 goto default; 7243 7244 default: 7245 case_default16: 7246 if (c.Iflags & CFopsize) 7247 goto do32; 7248 else 7249 goto do16; 7250 } 7251 } 7252 } 7253 else if (op == 0xF6) /* TEST mem8,immed8 */ 7254 { 7255 if ((rm & (7<<3)) == 0) 7256 do8bit(&ggen, cast(FL)c.IFL2,&c.IEV2); 7257 } 7258 else if (op == 0xF7) 7259 { 7260 if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */ 7261 { 7262 if ((I32 || I64) ^ ((c.Iflags & CFopsize) != 0)) 7263 do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7264 else 7265 do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 7266 } 7267 } 7268 7269 debug 7270 if (ggen.getOffset() - startoffset != calccodsize(c)) 7271 { 7272 printf("actual: %d, calc: %d\n", cast(int)(ggen.getOffset() - startoffset), cast(int)calccodsize(c)); 7273 code_print(c); 7274 assert(0); 7275 } 7276 } 7277 ggen.flush(); 7278 Offset(seg) = ggen.offset; 7279 //printf("-codout(), Coffset = x%x\n", Offset(seg)); 7280 return cast(uint)ggen.offset; /* ending address */ 7281 } 7282 7283 7284 private void do64bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags) 7285 { 7286 char *p; 7287 Symbol *s; 7288 targ_size_t ad; 7289 7290 assert(I64); 7291 switch (fl) 7292 { 7293 case FLconst: 7294 ad = *cast(targ_size_t *) uev; 7295 L1: 7296 pbuf.genp(8,&ad); 7297 return; 7298 7299 case FLdatseg: 7300 pbuf.flush(); 7301 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,CFoffset64 | flags); 7302 break; 7303 7304 case FLframehandler: 7305 framehandleroffset = pbuf.getOffset(); 7306 ad = 0; 7307 goto L1; 7308 7309 case FLswitch: 7310 pbuf.flush(); 7311 ad = uev.Vswitch.Btableoffset; 7312 if (config.flags & CFGromable) 7313 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7314 else 7315 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7316 break; 7317 7318 case FLcsdata: 7319 case FLfardata: 7320 //symbol_print(uev.Vsym); 7321 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7322 // strings and statics are treated like offsets from a 7323 // un-named external with is the start of .rodata or .data 7324 case FLextern: /* external data symbol */ 7325 case FLtlsdata: 7326 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 7327 { 7328 case FLgot: 7329 case FLgotoff: 7330 } 7331 pbuf.flush(); 7332 s = uev.Vsym; /* symbol pointer */ 7333 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7334 break; 7335 7336 static if (TARGET_OSX) 7337 { 7338 case FLgot: 7339 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7340 ad = 0; 7341 goto L1; 7342 } 7343 7344 case FLfunc: /* function call */ 7345 s = uev.Vsym; /* symbol pointer */ 7346 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7347 pbuf.flush(); 7348 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,CFoffset64 | flags); 7349 break; 7350 7351 case FLblock: /* displacement to another block */ 7352 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7353 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7354 goto L1; 7355 7356 case FLblockoff: 7357 pbuf.flush(); 7358 assert(uev.Vblock); 7359 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7360 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7361 break; 7362 7363 default: 7364 WRFL(fl); 7365 assert(0); 7366 } 7367 pbuf.offset += 8; 7368 } 7369 7370 7371 private void do32bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags, int val) 7372 { 7373 char *p; 7374 Symbol *s; 7375 targ_size_t ad; 7376 7377 //printf("do32bit(flags = x%x)\n", flags); 7378 switch (fl) 7379 { 7380 case FLconst: 7381 assert(targ_size_t.sizeof == 4 || targ_size_t.sizeof == 8); 7382 ad = * cast(targ_size_t *) uev; 7383 L1: 7384 pbuf.genp(4,&ad); 7385 return; 7386 7387 case FLdatseg: 7388 pbuf.flush(); 7389 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7390 break; 7391 7392 case FLframehandler: 7393 framehandleroffset = pbuf.getOffset(); 7394 ad = 0; 7395 goto L1; 7396 7397 case FLswitch: 7398 pbuf.flush(); 7399 ad = uev.Vswitch.Btableoffset; 7400 if (config.flags & CFGromable) 7401 { 7402 static if (TARGET_OSX) 7403 { 7404 // These are magic values based on the exact code generated for the switch jump 7405 if (I64) 7406 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7407 else 7408 uev.Vswitch.Btablebase = pbuf.getOffset() + 4 - 8; 7409 ad -= uev.Vswitch.Btablebase; 7410 goto L1; 7411 } 7412 else static if (TARGET_WINDOS) 7413 { 7414 if (I64) 7415 { 7416 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7417 ad -= uev.Vswitch.Btablebase; 7418 goto L1; 7419 } 7420 else 7421 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7422 } 7423 else 7424 { 7425 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7426 } 7427 } 7428 else 7429 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7430 break; 7431 7432 case FLcode: 7433 //assert(JMPJMPTABLE); // the only use case 7434 pbuf.flush(); 7435 ad = *cast(targ_size_t *) uev + pbuf.getOffset(); 7436 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7437 break; 7438 7439 case FLcsdata: 7440 case FLfardata: 7441 //symbol_print(uev.Vsym); 7442 7443 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7444 // strings and statics are treated like offsets from a 7445 // un-named external with is the start of .rodata or .data 7446 case FLextern: /* external data symbol */ 7447 case FLtlsdata: 7448 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 7449 { 7450 case FLgot: 7451 case FLgotoff: 7452 } 7453 pbuf.flush(); 7454 s = uev.Vsym; /* symbol pointer */ 7455 if (TARGET_WINDOS && I64 && (flags & CFpc32)) 7456 { 7457 /* This is for those funky fixups where the location to be fixed up 7458 * is a 'val' amount back from the current RIP, biased by adding 4. 7459 */ 7460 assert(val >= -5 && val <= 0); 7461 flags |= (-val & 7) << 24; // set CFREL value 7462 assert(CFREL == (7 << 24)); 7463 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7464 } 7465 else 7466 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7467 break; 7468 7469 static if (TARGET_OSX) 7470 { 7471 case FLgot: 7472 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7473 ad = 0; 7474 goto L1; 7475 } 7476 7477 case FLfunc: /* function call */ 7478 s = uev.Vsym; /* symbol pointer */ 7479 if (tyfarfunc(s.ty())) 7480 { /* Large code references are always absolute */ 7481 pbuf.flush(); 7482 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 4; 7483 } 7484 else if (s.Sseg == pbuf.seg && 7485 (s.Sclass == SCstatic || s.Sclass == SCglobal) && 7486 s.Sxtrnnum == 0 && flags & CFselfrel) 7487 { /* if we know it's relative address */ 7488 ad = s.Soffset - pbuf.getOffset() - 4; 7489 goto L1; 7490 } 7491 else 7492 { 7493 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7494 pbuf.flush(); 7495 objmod.reftoident(pbuf.seg,pbuf.offset,s,val,flags); 7496 } 7497 break; 7498 7499 case FLblock: /* displacement to another block */ 7500 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7501 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7502 goto L1; 7503 7504 case FLblockoff: 7505 pbuf.flush(); 7506 assert(uev.Vblock); 7507 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7508 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7509 break; 7510 7511 default: 7512 WRFL(fl); 7513 assert(0); 7514 } 7515 pbuf.offset += 4; 7516 } 7517 7518 7519 private void do16bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags) 7520 { 7521 char *p; 7522 Symbol *s; 7523 targ_size_t ad; 7524 7525 switch (fl) 7526 { 7527 case FLconst: 7528 pbuf.genp(2,cast(char *) uev); 7529 return; 7530 7531 case FLdatseg: 7532 pbuf.flush(); 7533 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7534 break; 7535 7536 case FLswitch: 7537 pbuf.flush(); 7538 ad = uev.Vswitch.Btableoffset; 7539 if (config.flags & CFGromable) 7540 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7541 else 7542 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7543 break; 7544 7545 case FLcsdata: 7546 case FLfardata: 7547 case FLextern: /* external data symbol */ 7548 case FLtlsdata: 7549 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7550 pbuf.flush(); 7551 s = uev.Vsym; /* symbol pointer */ 7552 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7553 break; 7554 7555 case FLfunc: /* function call */ 7556 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7557 s = uev.Vsym; /* symbol pointer */ 7558 if (tyfarfunc(s.ty())) 7559 { /* Large code references are always absolute */ 7560 pbuf.flush(); 7561 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 2; 7562 } 7563 else if (s.Sseg == pbuf.seg && 7564 (s.Sclass == SCstatic || s.Sclass == SCglobal) && 7565 s.Sxtrnnum == 0 && flags & CFselfrel) 7566 { /* if we know it's relative address */ 7567 ad = s.Soffset - pbuf.getOffset() - 2; 7568 goto L1; 7569 } 7570 else 7571 { 7572 pbuf.flush(); 7573 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags); 7574 } 7575 break; 7576 7577 case FLblock: /* displacement to another block */ 7578 ad = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7579 debug 7580 { 7581 targ_ptrdiff_t delta = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7582 assert(cast(short)delta == delta); 7583 } 7584 L1: 7585 pbuf.genp(2,&ad); // displacement 7586 return; 7587 7588 case FLblockoff: 7589 pbuf.flush(); 7590 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7591 break; 7592 7593 default: 7594 WRFL(fl); 7595 assert(0); 7596 } 7597 pbuf.offset += 2; 7598 } 7599 7600 7601 private void do8bit(MiniCodeBuf *pbuf, FL fl, evc *uev) 7602 { 7603 char c; 7604 targ_ptrdiff_t delta; 7605 7606 switch (fl) 7607 { 7608 case FLconst: 7609 c = cast(char)uev.Vuns; 7610 break; 7611 7612 case FLblock: 7613 delta = uev.Vblock.Boffset - pbuf.getOffset() - 1; 7614 if (cast(byte)delta != delta) 7615 { 7616 version (MARS) 7617 { 7618 if (uev.Vblock.Bsrcpos.Slinnum) 7619 printf("%s(%d): ", uev.Vblock.Bsrcpos.Sfilename, uev.Vblock.Bsrcpos.Slinnum); 7620 } 7621 printf("block displacement of %lld exceeds the maximum offset of -128 to 127.\n", cast(long)delta); 7622 err_exit(); 7623 } 7624 c = cast(char)delta; 7625 debug assert(uev.Vblock.Boffset > pbuf.getOffset() || c != 0x7F); 7626 break; 7627 7628 default: 7629 debug printf("fl = %d\n",fl); 7630 assert(0); 7631 } 7632 pbuf.gen(c); 7633 } 7634 7635 7636 /********************************** 7637 */ 7638 7639 version (SCPP) 7640 { 7641 static if (HYDRATE) 7642 { 7643 void code_hydrate(code **pc) 7644 { 7645 code *c; 7646 ubyte ins,rm; 7647 FL fl; 7648 7649 assert(pc); 7650 while (*pc) 7651 { 7652 c = cast(code *) ph_hydrate(cast(void**)pc); 7653 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7654 ins = vex_inssize(c); 7655 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7656 ins = inssize2[(c.Iop >> 8) & 0xFF]; 7657 else if ((c.Iop & 0xFF00) == 0x0F00) 7658 ins = inssize2[c.Iop & 0xFF]; 7659 else 7660 ins = inssize[c.Iop & 0xFF]; 7661 switch (c.Iop) 7662 { 7663 default: 7664 break; 7665 7666 case ESCAPE | ESClinnum: 7667 srcpos_hydrate(&c.IEV1.Vsrcpos); 7668 goto done; 7669 7670 case ESCAPE | ESCctor: 7671 case ESCAPE | ESCdtor: 7672 el_hydrate(&c.IEV1.Vtor); 7673 goto done; 7674 7675 case ASM: 7676 ph_hydrate(cast(void**)&c.IEV1.bytes); 7677 goto done; 7678 } 7679 if (!(ins & M) || 7680 ((rm = c.Irm) & 0xC0) == 0xC0) 7681 goto do2; /* if no first operand */ 7682 if (is32bitaddr(I32,c.Iflags)) 7683 { 7684 7685 if ( 7686 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 7687 ) 7688 goto do2; /* if no first operand */ 7689 } 7690 else 7691 { 7692 if ( 7693 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 7694 ) 7695 goto do2; /* if no first operand */ 7696 } 7697 fl = cast(FL) c.IFL1; 7698 switch (fl) 7699 { 7700 case FLudata: 7701 case FLdata: 7702 case FLreg: 7703 case FLauto: 7704 case FLfast: 7705 case FLbprel: 7706 case FLpara: 7707 case FLcsdata: 7708 case FLfardata: 7709 case FLtlsdata: 7710 case FLfunc: 7711 case FLpseudo: 7712 case FLextern: 7713 assert(flinsymtab[fl]); 7714 symbol_hydrate(&c.IEV1.Vsym); 7715 symbol_debug(c.IEV1.Vsym); 7716 break; 7717 7718 case FLdatseg: 7719 case FLfltreg: 7720 case FLallocatmp: 7721 case FLcs: 7722 case FLndp: 7723 case FLoffset: 7724 case FLlocalsize: 7725 case FLconst: 7726 case FLframehandler: 7727 assert(!flinsymtab[fl]); 7728 break; 7729 7730 case FLcode: 7731 ph_hydrate(cast(void**)&c.IEV1.Vcode); 7732 break; 7733 7734 case FLblock: 7735 case FLblockoff: 7736 ph_hydrate(cast(void**)&c.IEV1.Vblock); 7737 break; 7738 version (SCPP) 7739 { 7740 case FLctor: 7741 case FLdtor: 7742 el_hydrate(cast(elem**)&c.IEV1.Vtor); 7743 break; 7744 } 7745 case FLasm: 7746 ph_hydrate(cast(void**)&c.IEV1.bytes); 7747 break; 7748 7749 default: 7750 WRFL(fl); 7751 assert(0); 7752 } 7753 do2: 7754 /* Ignore TEST (F6 and F7) opcodes */ 7755 if (!(ins & T)) 7756 goto done; /* if no second operand */ 7757 7758 fl = cast(FL) c.IFL2; 7759 switch (fl) 7760 { 7761 case FLudata: 7762 case FLdata: 7763 case FLreg: 7764 case FLauto: 7765 case FLfast: 7766 case FLbprel: 7767 case FLpara: 7768 case FLcsdata: 7769 case FLfardata: 7770 case FLtlsdata: 7771 case FLfunc: 7772 case FLpseudo: 7773 case FLextern: 7774 assert(flinsymtab[fl]); 7775 symbol_hydrate(&c.IEV2.Vsym); 7776 symbol_debug(c.IEV2.Vsym); 7777 break; 7778 7779 case FLdatseg: 7780 case FLfltreg: 7781 case FLallocatmp: 7782 case FLcs: 7783 case FLndp: 7784 case FLoffset: 7785 case FLlocalsize: 7786 case FLconst: 7787 case FLframehandler: 7788 assert(!flinsymtab[fl]); 7789 break; 7790 7791 case FLcode: 7792 ph_hydrate(cast(void**)&c.IEV2.Vcode); 7793 break; 7794 7795 case FLblock: 7796 case FLblockoff: 7797 ph_hydrate(cast(void**)&c.IEV2.Vblock); 7798 break; 7799 7800 default: 7801 WRFL(fl); 7802 assert(0); 7803 } 7804 done: 7805 { } 7806 7807 pc = &c.next; 7808 } 7809 } 7810 } 7811 7812 /********************************** 7813 */ 7814 7815 static if (DEHYDRATE) 7816 { 7817 void code_dehydrate(code **pc) 7818 { 7819 code *c; 7820 ubyte ins,rm; 7821 FL fl; 7822 7823 while ((c = *pc) != null) 7824 { 7825 ph_dehydrate(pc); 7826 7827 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7828 ins = vex_inssize(c); 7829 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7830 ins = inssize2[(c.Iop >> 8) & 0xFF]; 7831 else if ((c.Iop & 0xFF00) == 0x0F00) 7832 ins = inssize2[c.Iop & 0xFF]; 7833 else 7834 ins = inssize[c.Iop & 0xFF]; 7835 switch (c.Iop) 7836 { 7837 default: 7838 break; 7839 7840 case ESCAPE | ESClinnum: 7841 srcpos_dehydrate(&c.IEV1.Vsrcpos); 7842 goto done; 7843 7844 case ESCAPE | ESCctor: 7845 case ESCAPE | ESCdtor: 7846 el_dehydrate(&c.IEV1.Vtor); 7847 goto done; 7848 7849 case ASM: 7850 ph_dehydrate(&c.IEV1.bytes); 7851 goto done; 7852 } 7853 7854 if (!(ins & M) || 7855 ((rm = c.Irm) & 0xC0) == 0xC0) 7856 goto do2; /* if no first operand */ 7857 if (is32bitaddr(I32,c.Iflags)) 7858 { 7859 7860 if ( 7861 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 7862 ) 7863 goto do2; /* if no first operand */ 7864 } 7865 else 7866 { 7867 if ( 7868 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 7869 ) 7870 goto do2; /* if no first operand */ 7871 } 7872 fl = cast(FL) c.IFL1; 7873 switch (fl) 7874 { 7875 case FLudata: 7876 case FLdata: 7877 case FLreg: 7878 case FLauto: 7879 case FLfast: 7880 case FLbprel: 7881 case FLpara: 7882 case FLcsdata: 7883 case FLfardata: 7884 case FLtlsdata: 7885 case FLfunc: 7886 case FLpseudo: 7887 case FLextern: 7888 assert(flinsymtab[fl]); 7889 symbol_dehydrate(&c.IEV1.Vsym); 7890 break; 7891 7892 case FLdatseg: 7893 case FLfltreg: 7894 case FLallocatmp: 7895 case FLcs: 7896 case FLndp: 7897 case FLoffset: 7898 case FLlocalsize: 7899 case FLconst: 7900 case FLframehandler: 7901 assert(!flinsymtab[fl]); 7902 break; 7903 7904 case FLcode: 7905 ph_dehydrate(&c.IEV1.Vcode); 7906 break; 7907 7908 case FLblock: 7909 case FLblockoff: 7910 ph_dehydrate(&c.IEV1.Vblock); 7911 break; 7912 version (SCPP) 7913 { 7914 case FLctor: 7915 case FLdtor: 7916 el_dehydrate(&c.IEV1.Vtor); 7917 break; 7918 } 7919 case FLasm: 7920 ph_dehydrate(&c.IEV1.bytes); 7921 break; 7922 7923 default: 7924 WRFL(fl); 7925 assert(0); 7926 break; 7927 } 7928 do2: 7929 /* Ignore TEST (F6 and F7) opcodes */ 7930 if (!(ins & T)) 7931 goto done; /* if no second operand */ 7932 7933 fl = cast(FL) c.IFL2; 7934 switch (fl) 7935 { 7936 case FLudata: 7937 case FLdata: 7938 case FLreg: 7939 case FLauto: 7940 case FLfast: 7941 case FLbprel: 7942 case FLpara: 7943 case FLcsdata: 7944 case FLfardata: 7945 case FLtlsdata: 7946 case FLfunc: 7947 case FLpseudo: 7948 case FLextern: 7949 assert(flinsymtab[fl]); 7950 symbol_dehydrate(&c.IEV2.Vsym); 7951 break; 7952 7953 case FLdatseg: 7954 case FLfltreg: 7955 case FLallocatmp: 7956 case FLcs: 7957 case FLndp: 7958 case FLoffset: 7959 case FLlocalsize: 7960 case FLconst: 7961 case FLframehandler: 7962 assert(!flinsymtab[fl]); 7963 break; 7964 7965 case FLcode: 7966 ph_dehydrate(&c.IEV2.Vcode); 7967 break; 7968 7969 case FLblock: 7970 case FLblockoff: 7971 ph_dehydrate(&c.IEV2.Vblock); 7972 break; 7973 7974 default: 7975 WRFL(fl); 7976 assert(0); 7977 break; 7978 } 7979 done: 7980 pc = &code_next(c); 7981 } 7982 } 7983 } 7984 } 7985 7986 /*************************** 7987 * Debug code to dump code structure. 7988 */ 7989 7990 void WRcodlst(code *c) 7991 { 7992 for (; c; c = code_next(c)) 7993 code_print(c); 7994 } 7995 7996 extern (C) void code_print(code* c) 7997 { 7998 ubyte ins; 7999 ubyte rexb; 8000 8001 if (c == null) 8002 { 8003 printf("code 0\n"); 8004 return; 8005 } 8006 8007 const op = c.Iop; 8008 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 8009 ins = vex_inssize(c); 8010 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 8011 ins = inssize2[(op >> 8) & 0xFF]; 8012 else if ((c.Iop & 0xFF00) == 0x0F00) 8013 ins = inssize2[op & 0xFF]; 8014 else 8015 ins = inssize[op & 0xFF]; 8016 8017 printf("code %p: nxt=%p ",c,code_next(c)); 8018 8019 if (c.Iflags & CFvex) 8020 { 8021 if (c.Iflags & CFvex3) 8022 { 8023 printf("vex=0xC4"); 8024 printf(" 0x%02X", VEX3_B1(c.Ivex)); 8025 printf(" 0x%02X", VEX3_B2(c.Ivex)); 8026 rexb = 8027 ( c.Ivex.w ? REX_W : 0) | 8028 (!c.Ivex.r ? REX_R : 0) | 8029 (!c.Ivex.x ? REX_X : 0) | 8030 (!c.Ivex.b ? REX_B : 0); 8031 } 8032 else 8033 { 8034 printf("vex=0xC5"); 8035 printf(" 0x%02X", VEX2_B1(c.Ivex)); 8036 rexb = !c.Ivex.r ? REX_R : 0; 8037 } 8038 printf(" "); 8039 } 8040 else 8041 rexb = c.Irex; 8042 8043 if (rexb) 8044 { 8045 printf("rex=0x%02X ", c.Irex); 8046 if (rexb & REX_W) 8047 printf("W"); 8048 if (rexb & REX_R) 8049 printf("R"); 8050 if (rexb & REX_X) 8051 printf("X"); 8052 if (rexb & REX_B) 8053 printf("B"); 8054 printf(" "); 8055 } 8056 printf("op=0x%02X",op); 8057 8058 if ((op & 0xFF) == ESCAPE) 8059 { 8060 if ((op & 0xFF00) == ESClinnum) 8061 { 8062 printf(" linnum = %d\n",c.IEV1.Vsrcpos.Slinnum); 8063 return; 8064 } 8065 printf(" ESCAPE %d",c.Iop >> 8); 8066 } 8067 if (c.Iflags) 8068 printf(" flg=%x",c.Iflags); 8069 if (ins & M) 8070 { 8071 uint rm = c.Irm; 8072 printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7); 8073 if (!I16 && issib(rm)) 8074 { 8075 ubyte sib = c.Isib; 8076 printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7); 8077 } 8078 if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40) 8079 { 8080 switch (c.IFL1) 8081 { 8082 case FLconst: 8083 case FLoffset: 8084 printf(" int = %4d",c.IEV1.Vuns); 8085 break; 8086 8087 case FLblock: 8088 printf(" block = %p",c.IEV1.Vblock); 8089 break; 8090 8091 case FLswitch: 8092 case FLblockoff: 8093 case FLlocalsize: 8094 case FLframehandler: 8095 case 0: 8096 break; 8097 8098 case FLdatseg: 8099 printf(" FLdatseg %d.%llx",c.IEV1.Vseg,cast(ulong)c.IEV1.Vpointer); 8100 break; 8101 8102 case FLauto: 8103 case FLfast: 8104 case FLreg: 8105 case FLdata: 8106 case FLudata: 8107 case FLpara: 8108 case FLbprel: 8109 case FLtlsdata: 8110 case FLextern: 8111 printf(" "); 8112 WRFL(cast(FL)c.IFL1); 8113 printf(" sym='%s'",c.IEV1.Vsym.Sident.ptr); 8114 if (c.IEV1.Voffset) 8115 printf(".%d", cast(int)c.IEV1.Voffset); 8116 break; 8117 8118 default: 8119 WRFL(cast(FL)c.IFL1); 8120 break; 8121 } 8122 } 8123 } 8124 if (ins & T) 8125 { 8126 printf(" "); 8127 WRFL(cast(FL)c.IFL2); 8128 switch (c.IFL2) 8129 { 8130 case FLconst: 8131 printf(" int = %4d",c.IEV2.Vuns); 8132 break; 8133 8134 case FLblock: 8135 printf(" block = %p",c.IEV2.Vblock); 8136 break; 8137 8138 case FLswitch: 8139 case FLblockoff: 8140 case 0: 8141 case FLlocalsize: 8142 case FLframehandler: 8143 break; 8144 8145 case FLdatseg: 8146 printf(" %d.%llx",c.IEV2.Vseg,cast(ulong)c.IEV2.Vpointer); 8147 break; 8148 8149 case FLauto: 8150 case FLfast: 8151 case FLreg: 8152 case FLpara: 8153 case FLbprel: 8154 case FLfunc: 8155 case FLdata: 8156 case FLudata: 8157 case FLtlsdata: 8158 printf(" sym='%s'",c.IEV2.Vsym.Sident.ptr); 8159 break; 8160 8161 case FLcode: 8162 printf(" code = %p",c.IEV2.Vcode); 8163 break; 8164 8165 default: 8166 WRFL(cast(FL)c.IFL2); 8167 break; 8168 } 8169 } 8170 printf("\n"); 8171 } 8172 8173 }