1 /** 2 * Compiler implementation of the 3 * $(LINK2 http://www.dlang.org, D programming language). 4 * 5 * Copyright: Copyright (C) 1994-1998 by Symantec 6 * Copyright (C) 2000-2020 by The D Language Foundation, All Rights Reserved 7 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 8 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/backend/cod3.d, backend/cod3.d) 10 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/backend/cod3.d 11 */ 12 13 module dmd.backend.cod3; 14 15 version (SCPP) 16 version = COMPILE; 17 version (MARS) 18 version = COMPILE; 19 20 version (COMPILE) 21 { 22 23 import core.stdc.stdio; 24 import core.stdc.stdlib; 25 import core.stdc..string; 26 27 import dmd.backend.backend; 28 import dmd.backend.cc; 29 import dmd.backend.cdef; 30 import dmd.backend.cgcse; 31 import dmd.backend.code; 32 import dmd.backend.code_x86; 33 import dmd.backend.codebuilder; 34 import dmd.backend.dlist; 35 import dmd.backend.dvec; 36 import dmd.backend.melf; 37 import dmd.backend.mem; 38 import dmd.backend.el; 39 import dmd.backend.exh; 40 import dmd.backend.global; 41 import dmd.backend.obj; 42 import dmd.backend.oper; 43 import dmd.backend.outbuf; 44 import dmd.backend.rtlsym; 45 import dmd.backend.ty; 46 import dmd.backend.type; 47 import dmd.backend.xmm; 48 49 version (SCPP) 50 { 51 import parser; 52 import precomp; 53 } 54 55 extern (C++): 56 57 nothrow: 58 59 version (MARS) 60 enum MARS = true; 61 else 62 enum MARS = false; 63 64 int REGSIZE(); 65 66 extern __gshared CGstate cgstate; 67 extern __gshared ubyte[FLMAX] segfl; 68 extern __gshared bool[FLMAX] stackfl, flinsymtab; 69 70 private extern (D) uint mask(uint m) { return 1 << m; } 71 72 //private void genorreg(ref CodeBuilder c, uint t, uint f) { genregs(c, 0x09, f, t); } 73 74 extern __gshared targ_size_t retsize; 75 76 enum JMPJMPTABLE = false; // benchmarking shows it's slower 77 78 enum MINLL = 0x8000_0000_0000_0000L; 79 enum MAXLL = 0x7FFF_FFFF_FFFF_FFFFL; 80 81 /************* 82 * Size in bytes of each instruction. 83 * 0 means illegal instruction. 84 * bit M: if there is a modregrm field (EV1 is reserved for modregrm) 85 * bit T: if there is a second operand (EV2) 86 * bit E: if second operand is only 8 bits 87 * bit A: a short version exists for the AX reg 88 * bit R: a short version exists for regs 89 * bits 2..0: size of instruction (excluding optional bytes) 90 */ 91 92 enum 93 { 94 M = 0x80, 95 T = 0x40, 96 E = 0x20, 97 A = 0x10, 98 R = 0x08, 99 W = 0, 100 } 101 102 private __gshared ubyte[256] inssize = 103 [ M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 00 */ 104 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 08 */ 105 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 10 */ 106 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 18 */ 107 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 20 */ 108 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 28 */ 109 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 30 */ 110 M|2,M|2,M|2,M|2, T|E|2,T|3,1,1, /* 38 */ 111 1,1,1,1, 1,1,1,1, /* 40 */ 112 1,1,1,1, 1,1,1,1, /* 48 */ 113 1,1,1,1, 1,1,1,1, /* 50 */ 114 1,1,1,1, 1,1,1,1, /* 58 */ 115 1,1,M|2,M|2, 1,1,1,1, /* 60 */ 116 T|3,M|T|4,T|E|2,M|T|E|3, 1,1,1,1, /* 68 */ 117 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 70 */ 118 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* 78 */ 119 M|T|E|A|3,M|T|A|4,M|T|E|3,M|T|E|3, M|2,M|2,M|2,M|A|R|2, /* 80 */ 120 M|A|2,M|A|2,M|A|2,M|A|2, M|2,M|2,M|2,M|R|2, /* 88 */ 121 1,1,1,1, 1,1,1,1, /* 90 */ 122 1,1,T|5,1, 1,1,1,1, /* 98 */ 123 124 // cod3_set32() patches this 125 // T|5,T|5,T|5,T|5, 1,1,1,1, /* A0 */ 126 T|3,T|3,T|3,T|3, 1,1,1,1, /* A0 */ 127 128 T|E|2,T|3,1,1, 1,1,1,1, /* A8 */ 129 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* B0 */ 130 T|3,T|3,T|3,T|3, T|3,T|3,T|3,T|3, /* B8 */ 131 M|T|E|3,M|T|E|3,T|3,1, M|2,M|2,M|T|E|R|3,M|T|R|4, /* C0 */ 132 T|E|4,1,T|3,1, 1,T|E|2,1,1, /* C8 */ 133 M|2,M|2,M|2,M|2, T|E|2,T|E|2,0,1, /* D0 */ 134 /* For the floating instructions, allow room for the FWAIT */ 135 M|2,M|2,M|2,M|2, M|2,M|2,M|2,M|2, /* D8 */ 136 T|E|2,T|E|2,T|E|2,T|E|2, T|E|2,T|E|2,T|E|2,T|E|2, /* E0 */ 137 T|3,T|3,T|5,T|E|2, 1,1,1,1, /* E8 */ 138 1,0,1,1, 1,1,M|A|2,M|A|2, /* F0 */ 139 1,1,1,1, 1,1,M|2,M|R|2 /* F8 */ 140 ]; 141 142 private __gshared const ubyte[256] inssize32 = 143 [ 2,2,2,2, 2,5,1,1, /* 00 */ 144 2,2,2,2, 2,5,1,1, /* 08 */ 145 2,2,2,2, 2,5,1,1, /* 10 */ 146 2,2,2,2, 2,5,1,1, /* 18 */ 147 2,2,2,2, 2,5,1,1, /* 20 */ 148 2,2,2,2, 2,5,1,1, /* 28 */ 149 2,2,2,2, 2,5,1,1, /* 30 */ 150 2,2,2,2, 2,5,1,1, /* 38 */ 151 1,1,1,1, 1,1,1,1, /* 40 */ 152 1,1,1,1, 1,1,1,1, /* 48 */ 153 1,1,1,1, 1,1,1,1, /* 50 */ 154 1,1,1,1, 1,1,1,1, /* 58 */ 155 1,1,2,2, 1,1,1,1, /* 60 */ 156 5,6,2,3, 1,1,1,1, /* 68 */ 157 2,2,2,2, 2,2,2,2, /* 70 */ 158 2,2,2,2, 2,2,2,2, /* 78 */ 159 3,6,3,3, 2,2,2,2, /* 80 */ 160 2,2,2,2, 2,2,2,2, /* 88 */ 161 1,1,1,1, 1,1,1,1, /* 90 */ 162 1,1,7,1, 1,1,1,1, /* 98 */ 163 5,5,5,5, 1,1,1,1, /* A0 */ 164 2,5,1,1, 1,1,1,1, /* A8 */ 165 2,2,2,2, 2,2,2,2, /* B0 */ 166 5,5,5,5, 5,5,5,5, /* B8 */ 167 3,3,3,1, 2,2,3,6, /* C0 */ 168 4,1,3,1, 1,2,1,1, /* C8 */ 169 2,2,2,2, 2,2,0,1, /* D0 */ 170 /* For the floating instructions, don't need room for the FWAIT */ 171 2,2,2,2, 2,2,2,2, /* D8 */ 172 173 2,2,2,2, 2,2,2,2, /* E0 */ 174 5,5,7,2, 1,1,1,1, /* E8 */ 175 1,0,1,1, 1,1,2,2, /* F0 */ 176 1,1,1,1, 1,1,2,2 /* F8 */ 177 ]; 178 179 /* For 2 byte opcodes starting with 0x0F */ 180 private __gshared ubyte[256] inssize2 = 181 [ M|3,M|3,M|3,M|3, 2,2,2,2, // 00 182 2,2,M|3,2, 2,M|3,2,M|T|E|4, // 08 183 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 10 184 M|3,2,2,2, 2,2,2,2, // 18 185 M|3,M|3,M|3,M|3, M|3,2,M|3,2, // 20 186 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 28 187 2,2,2,2, 2,2,2,2, // 30 188 M|4,2,M|T|E|5,2, 2,2,2,2, // 38 189 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 40 190 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 48 191 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 50 192 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 58 193 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 60 194 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 68 195 M|T|E|4,M|T|E|4,M|T|E|4,M|T|E|4, M|3,M|3,M|3,2, // 70 196 2,2,2,2, M|3,M|3,M|3,M|3, // 78 197 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 80 198 W|T|4,W|T|4,W|T|4,W|T|4, W|T|4,W|T|4,W|T|4,W|T|4, // 88 199 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 90 200 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // 98 201 2,2,2,M|3, M|T|E|4,M|3,2,2, // A0 202 2,2,2,M|3, M|T|E|4,M|3,M|3,M|3, // A8 203 M|E|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // B0 204 M|3,2,M|T|E|4,M|3, M|3,M|3,M|3,M|3, // B8 205 M|3,M|3,M|T|E|4,M|3, M|T|E|4,M|T|E|4,M|T|E|4,M|3, // C0 206 2,2,2,2, 2,2,2,2, // C8 207 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D0 208 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // D8 209 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E0 210 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // E8 211 M|3,M|3,M|3,M|3, M|3,M|3,M|3,M|3, // F0 212 M|3,M|3,M|3,M|3, M|3,M|3,M|3,2 // F8 213 ]; 214 215 /************************************************* 216 * Generate code to save `reg` in `regsave` stack area. 217 * Params: 218 * regsave = register save areay on stack 219 * cdb = where to write generated code 220 * reg = register to save 221 * idx = set to location in regsave for use in REGSAVE_restore() 222 */ 223 224 void REGSAVE_save(ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, out uint idx) 225 { 226 if (isXMMreg(reg)) 227 { 228 regsave.alignment = 16; 229 regsave.idx = (regsave.idx + 15) & ~15; 230 idx = regsave.idx; 231 regsave.idx += 16; 232 // MOVD idx[RBP],xmm 233 opcode_t op = STOAPD; 234 if (TARGET_LINUX && I32) 235 // Haven't yet figured out why stack is not aligned to 16 236 op = STOUPD; 237 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 238 } 239 else 240 { 241 if (!regsave.alignment) 242 regsave.alignment = REGSIZE; 243 idx = regsave.idx; 244 regsave.idx += REGSIZE; 245 // MOV idx[RBP],reg 246 cdb.genc1(0x89,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 247 if (I64) 248 code_orrex(cdb.last(), REX_W); 249 } 250 reflocal = true; 251 if (regsave.idx > regsave.top) 252 regsave.top = regsave.idx; // keep high water mark 253 } 254 255 /******************************* 256 * Restore `reg` from `regsave` area. 257 * Complement REGSAVE_save(). 258 */ 259 260 void REGSAVE_restore(const ref REGSAVE regsave, ref CodeBuilder cdb, reg_t reg, uint idx) 261 { 262 if (isXMMreg(reg)) 263 { 264 assert(regsave.alignment == 16); 265 // MOVD xmm,idx[RBP] 266 opcode_t op = LODAPD; 267 if (TARGET_LINUX && I32) 268 // Haven't yet figured out why stack is not aligned to 16 269 op = LODUPD; 270 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLregsave,cast(targ_uns) idx); 271 } 272 else 273 { // MOV reg,idx[RBP] 274 cdb.genc1(0x8B,modregxrm(2, reg, BPRM),FLregsave,cast(targ_uns) idx); 275 if (I64) 276 code_orrex(cdb.last(), REX_W); 277 } 278 } 279 280 /************************************ 281 * Size for vex encoded instruction. 282 */ 283 284 ubyte vex_inssize(code *c) 285 { 286 assert(c.Iflags & CFvex && c.Ivex.pfx == 0xC4); 287 ubyte ins; 288 if (c.Iflags & CFvex3) 289 { 290 switch (c.Ivex.mmmm) 291 { 292 case 0: // no prefix 293 case 1: // 0F 294 ins = cast(ubyte)(inssize2[c.Ivex.op] + 2); 295 break; 296 case 2: // 0F 38 297 ins = cast(ubyte)(inssize2[0x38] + 1); 298 break; 299 case 3: // 0F 3A 300 ins = cast(ubyte)(inssize2[0x3A] + 1); 301 break; 302 default: 303 printf("Iop = %x mmmm = %x\n", c.Iop, c.Ivex.mmmm); 304 assert(0); 305 } 306 } 307 else 308 { 309 ins = cast(ubyte)(inssize2[c.Ivex.op] + 1); 310 } 311 return ins; 312 } 313 314 /************************************ 315 * Determine if there is a modregrm byte for code. 316 */ 317 318 int cod3_EA(code *c) 319 { uint ins; 320 321 opcode_t op1 = c.Iop & 0xFF; 322 if (op1 == ESCAPE) 323 ins = 0; 324 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 325 ins = inssize2[(c.Iop >> 8) & 0xFF]; 326 else if ((c.Iop & 0xFF00) == 0x0F00) 327 ins = inssize2[op1]; 328 else 329 ins = inssize[op1]; 330 return ins & M; 331 } 332 333 /******************************** 334 * setup ALLREGS and BYTEREGS 335 * called by: codgen 336 */ 337 338 void cod3_initregs() 339 { 340 if (I64) 341 { 342 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 343 BYTEREGS = ALLREGS; 344 } 345 else 346 { 347 ALLREGS = ALLREGS_INIT; 348 BYTEREGS = BYTEREGS_INIT; 349 } 350 } 351 352 /******************************** 353 * set initial global variable values 354 */ 355 356 void cod3_setdefault() 357 { 358 fregsaved = mBP | mSI | mDI; 359 } 360 361 /******************************** 362 * Fix global variables for 386. 363 */ 364 365 void cod3_set32() 366 { 367 inssize[0xA0] = T|5; 368 inssize[0xA1] = T|5; 369 inssize[0xA2] = T|5; 370 inssize[0xA3] = T|5; 371 BPRM = 5; /* [EBP] addressing mode */ 372 fregsaved = mBP | mBX | mSI | mDI; // saved across function calls 373 FLOATREGS = FLOATREGS_32; 374 FLOATREGS2 = FLOATREGS2_32; 375 DOUBLEREGS = DOUBLEREGS_32; 376 if (config.flags3 & CFG3eseqds) 377 fregsaved |= mES; 378 379 foreach (ref v; inssize2[0x80 .. 0x90]) 380 v = W|T|6; 381 382 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 4; 383 } 384 385 /******************************** 386 * Fix global variables for I64. 387 */ 388 389 void cod3_set64() 390 { 391 inssize[0xA0] = T|5; // MOV AL,mem 392 inssize[0xA1] = T|5; // MOV RAX,mem 393 inssize[0xA2] = T|5; // MOV mem,AL 394 inssize[0xA3] = T|5; // MOV mem,RAX 395 BPRM = 5; // [RBP] addressing mode 396 397 static if (TARGET_WINDOS) 398 { 399 fregsaved = mBP | mBX | mDI | mSI | mR12 | mR13 | mR14 | mR15 | mES | mXMM6 | mXMM7; // also XMM8..15; 400 } 401 else 402 { 403 fregsaved = mBP | mBX | mR12 | mR13 | mR14 | mR15 | mES; // saved across function calls 404 } 405 FLOATREGS = FLOATREGS_64; 406 FLOATREGS2 = FLOATREGS2_64; 407 DOUBLEREGS = DOUBLEREGS_64; 408 409 ALLREGS = mAX|mBX|mCX|mDX|mSI|mDI| mR8|mR9|mR10|mR11|mR12|mR13|mR14|mR15; 410 BYTEREGS = ALLREGS; 411 412 foreach (ref v; inssize2[0x80 .. 0x90]) 413 v = W|T|6; 414 415 TARGET_STACKALIGN = config.fpxmmregs ? 16 : 8; 416 } 417 418 /********************************* 419 * Word or dword align start of function. 420 * Params: 421 * seg = segment to write alignment bytes to 422 * nbytes = number of alignment bytes to write 423 */ 424 void cod3_align_bytes(int seg, size_t nbytes) 425 { 426 /* Table 4-2 from Intel Instruction Set Reference M-Z 427 * 1 bytes NOP 90 428 * 2 bytes 66 NOP 66 90 429 * 3 bytes NOP DWORD ptr [EAX] 0F 1F 00 430 * 4 bytes NOP DWORD ptr [EAX + 00H] 0F 1F 40 00 431 * 5 bytes NOP DWORD ptr [EAX + EAX*1 + 00H] 0F 1F 44 00 00 432 * 6 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00H] 66 0F 1F 44 00 00 433 * 7 bytes NOP DWORD ptr [EAX + 00000000H] 0F 1F 80 00 00 00 00 434 * 8 bytes NOP DWORD ptr [EAX + EAX*1 + 00000000H] 0F 1F 84 00 00 00 00 00 435 * 9 bytes 66 NOP DWORD ptr [EAX + EAX*1 + 00000000H] 66 0F 1F 84 00 00 00 00 00 436 * only for CPUs: CPUID.01H.EAX[Bytes 11:8] = 0110B or 1111B 437 */ 438 439 assert(SegData[seg].SDseg == seg); 440 441 while (nbytes) 442 { size_t n = nbytes; 443 const(char)* p; 444 445 if (nbytes > 1 && (I64 || config.fpxmmregs)) 446 { 447 switch (n) 448 { 449 case 2: p = "\x66\x90"; break; 450 case 3: p = "\x0F\x1F\x00"; break; 451 case 4: p = "\x0F\x1F\x40\x00"; break; 452 case 5: p = "\x0F\x1F\x44\x00\x00"; break; 453 case 6: p = "\x66\x0F\x1F\x44\x00\x00"; break; 454 case 7: p = "\x0F\x1F\x80\x00\x00\x00\x00"; break; 455 case 8: p = "\x0F\x1F\x84\x00\x00\x00\x00\x00"; break; 456 default: p = "\x66\x0F\x1F\x84\x00\x00\x00\x00\x00"; n = 9; break; 457 } 458 } 459 else 460 { 461 static immutable ubyte[15] nops = [ 462 0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90,0x90 463 ]; // XCHG AX,AX 464 if (n > nops.length) 465 n = nops.length; 466 p = cast(char*)nops; 467 } 468 objmod.write_bytes(SegData[seg],cast(uint)n,cast(char*)p); 469 nbytes -= n; 470 } 471 } 472 473 /**************************** 474 * Align start of function. 475 * Params: 476 * seg = segment of function 477 */ 478 void cod3_align(int seg) 479 { 480 uint nbytes; 481 static if (TARGET_WINDOS) 482 { 483 if (config.flags4 & CFG4speed) // if optimized for speed 484 { 485 // Pick alignment based on CPU target 486 if (config.target_cpu == TARGET_80486 || 487 config.target_cpu >= TARGET_PentiumPro) 488 { // 486 does reads on 16 byte boundaries, so if we are near 489 // such a boundary, align us to it 490 491 nbytes = -Offset(seg) & 15; 492 if (nbytes < 8) 493 cod3_align_bytes(seg, nbytes); 494 } 495 } 496 } 497 else 498 { 499 nbytes = -Offset(seg) & 7; 500 cod3_align_bytes(seg, nbytes); 501 } 502 } 503 504 505 /********************************** 506 * Generate code to adjust the stack pointer by `nbytes` 507 * Params: 508 * cdb = code builder 509 * nbytes = number of bytes to adjust stack pointer 510 */ 511 void cod3_stackadj(ref CodeBuilder cdb, int nbytes) 512 { 513 //printf("cod3_stackadj(%d)\n", nbytes); 514 uint grex = I64 ? REX_W << 16 : 0; 515 uint rm; 516 if (nbytes > 0) 517 rm = modregrm(3,5,SP); // SUB ESP,nbytes 518 else 519 { 520 nbytes = -nbytes; 521 rm = modregrm(3,0,SP); // ADD ESP,nbytes 522 } 523 cdb.genc2(0x81, grex | rm, nbytes); 524 } 525 526 /********************************** 527 * Generate code to align the stack pointer at `nbytes` 528 * Params: 529 * cdb = code builder 530 * nbytes = number of bytes to align stack pointer 531 */ 532 void cod3_stackalign(ref CodeBuilder cdb, int nbytes) 533 { 534 //printf("cod3_stackalign(%d)\n", nbytes); 535 const grex = I64 ? REX_W << 16 : 0; 536 const rm = modregrm(3, 4, SP); // AND ESP,-nbytes 537 cdb.genc2(0x81, grex | rm, -nbytes); 538 } 539 540 static if (ELFOBJ) 541 { 542 /* Constructor that links the ModuleReference to the head of 543 * the list pointed to by _Dmoduleref 544 */ 545 void cod3_buildmodulector(Outbuffer* buf, int codeOffset, int refOffset) 546 { 547 /* ret 548 * codeOffset: 549 * pushad 550 * mov EAX,&ModuleReference 551 * mov ECX,_DmoduleRef 552 * mov EDX,[ECX] 553 * mov [EAX],EDX 554 * mov [ECX],EAX 555 * popad 556 * ret 557 */ 558 559 const int seg = CODE; 560 561 if (I64 && config.flags3 & CFG3pic) 562 { // LEA RAX,ModuleReference[RIP] 563 buf.writeByte(REX | REX_W); 564 buf.writeByte(LEA); 565 buf.writeByte(modregrm(0,AX,5)); 566 codeOffset += 3; 567 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_PC32, 3 /*STI_DATA*/, refOffset - 4); 568 569 // MOV RCX,_DmoduleRef@GOTPCREL[RIP] 570 buf.writeByte(REX | REX_W); 571 buf.writeByte(0x8B); 572 buf.writeByte(modregrm(0,CX,5)); 573 codeOffset += 3; 574 codeOffset += Obj.writerel(seg, codeOffset, R_X86_64_GOTPCREL, Obj.external_def("_Dmodule_ref"), -4); 575 } 576 else 577 { 578 /* movl ModuleReference*, %eax */ 579 buf.writeByte(0xB8); 580 codeOffset += 1; 581 const uint reltype = I64 ? R_X86_64_32 : R_386_32; 582 codeOffset += Obj.writerel(seg, codeOffset, reltype, 3 /*STI_DATA*/, refOffset); 583 584 /* movl _Dmodule_ref, %ecx */ 585 buf.writeByte(0xB9); 586 codeOffset += 1; 587 codeOffset += Obj.writerel(seg, codeOffset, reltype, Obj.external_def("_Dmodule_ref"), 0); 588 } 589 590 if (I64) 591 buf.writeByte(REX | REX_W); 592 buf.writeByte(0x8B); buf.writeByte(0x11); /* movl (%ecx), %edx */ 593 if (I64) 594 buf.writeByte(REX | REX_W); 595 buf.writeByte(0x89); buf.writeByte(0x10); /* movl %edx, (%eax) */ 596 if (I64) 597 buf.writeByte(REX | REX_W); 598 buf.writeByte(0x89); buf.writeByte(0x01); /* movl %eax, (%ecx) */ 599 600 buf.writeByte(0xC3); /* ret */ 601 } 602 603 } 604 605 606 /***************************** 607 * Given a type, return a mask of 608 * registers to hold that type. 609 * Input: 610 * tyf function type 611 */ 612 613 regm_t regmask(tym_t tym, tym_t tyf) 614 { 615 switch (tybasic(tym)) 616 { 617 case TYvoid: 618 case TYstruct: 619 case TYarray: 620 return 0; 621 622 case TYbool: 623 case TYwchar_t: 624 case TYchar16: 625 case TYchar: 626 case TYschar: 627 case TYuchar: 628 case TYshort: 629 case TYushort: 630 case TYint: 631 case TYuint: 632 case TYnullptr: 633 case TYnptr: 634 case TYnref: 635 case TYsptr: 636 case TYcptr: 637 case TYimmutPtr: 638 case TYsharePtr: 639 case TYrestrictPtr: 640 case TYfgPtr: 641 return mAX; 642 643 case TYfloat: 644 case TYifloat: 645 if (I64) 646 return mXMM0; 647 if (config.exe & EX_flat) 648 return mST0; 649 goto case TYlong; 650 651 case TYlong: 652 case TYulong: 653 case TYdchar: 654 if (!I16) 655 return mAX; 656 goto case TYfptr; 657 658 case TYfptr: 659 case TYhptr: 660 return mDX | mAX; 661 662 case TYcent: 663 case TYucent: 664 assert(I64); 665 return mDX | mAX; 666 667 case TYvptr: 668 return mDX | mBX; 669 670 case TYdouble: 671 case TYdouble_alias: 672 case TYidouble: 673 if (I64) 674 return mXMM0; 675 if (config.exe & EX_flat) 676 return mST0; 677 return DOUBLEREGS; 678 679 case TYllong: 680 case TYullong: 681 return I64 ? cast(regm_t) mAX : (I32 ? mDX | mAX : DOUBLEREGS); 682 683 case TYldouble: 684 case TYildouble: 685 return mST0; 686 687 case TYcfloat: 688 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 689 { 690 if (I32 && tybasic(tyf) == TYnfunc) 691 return mDX | mAX; 692 } 693 goto case TYcdouble; 694 695 case TYcdouble: 696 if (I64) 697 return mXMM0 | mXMM1; 698 goto case TYcldouble; 699 700 case TYcldouble: 701 return mST01; 702 703 // SIMD vector types 704 case TYfloat4: 705 case TYdouble2: 706 case TYschar16: 707 case TYuchar16: 708 case TYshort8: 709 case TYushort8: 710 case TYlong4: 711 case TYulong4: 712 case TYllong2: 713 case TYullong2: 714 715 case TYfloat8: 716 case TYdouble4: 717 case TYschar32: 718 case TYuchar32: 719 case TYshort16: 720 case TYushort16: 721 case TYlong8: 722 case TYulong8: 723 case TYllong4: 724 case TYullong4: 725 if (!config.fpxmmregs) 726 { printf("SIMD operations not supported on this platform\n"); 727 exit(1); 728 } 729 return mXMM0; 730 731 default: 732 debug WRTYxx(tym); 733 assert(0); 734 } 735 } 736 737 /******************************* 738 * setup register allocator parameters with platform specific data 739 */ 740 void cgreg_dst_regs(reg_t* dst_integer_reg, reg_t* dst_float_reg) 741 { 742 *dst_integer_reg = AX; 743 *dst_float_reg = XMM0; 744 } 745 746 void cgreg_set_priorities(tym_t ty, const(reg_t)** pseq, const(reg_t)** pseqmsw) 747 { 748 const sz = tysize(ty); 749 750 if (tyxmmreg(ty)) 751 { 752 static immutable ubyte[9] sequence = [XMM0,XMM1,XMM2,XMM3,XMM4,XMM5,XMM6,XMM7,NOREG]; 753 *pseq = sequence.ptr; 754 } 755 else if (I64) 756 { 757 if (sz == REGSIZE * 2) 758 { 759 static immutable ubyte[3] seqmsw1 = [CX,DX,NOREG]; 760 static immutable ubyte[5] seqlsw1 = [AX,BX,SI,DI,NOREG]; 761 *pseq = seqlsw1.ptr; 762 *pseqmsw = seqmsw1.ptr; 763 } 764 else 765 { // R10 is reserved for the static link 766 static immutable ubyte[15] sequence2 = [AX,CX,DX,SI,DI,R8,R9,R11,BX,R12,R13,R14,R15,BP,NOREG]; 767 *pseq = cast(ubyte*)sequence2.ptr; 768 } 769 } 770 else if (I32) 771 { 772 if (sz == REGSIZE * 2) 773 { 774 static immutable ubyte[5] seqlsw3 = [AX,BX,SI,DI,NOREG]; 775 static immutable ubyte[3] seqmsw3 = [CX,DX,NOREG]; 776 *pseq = seqlsw3.ptr; 777 *pseqmsw = seqmsw3.ptr; 778 } 779 else 780 { 781 static immutable ubyte[8] sequence4 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 782 *pseq = sequence4.ptr; 783 } 784 } 785 else 786 { assert(I16); 787 if (typtr(ty)) 788 { 789 // For pointer types, try to pick index register first 790 static immutable ubyte[8] seqidx5 = [BX,SI,DI,AX,CX,DX,BP,NOREG]; 791 *pseq = seqidx5.ptr; 792 } 793 else 794 { 795 // Otherwise, try to pick index registers last 796 static immutable ubyte[8] sequence6 = [AX,CX,DX,BX,SI,DI,BP,NOREG]; 797 *pseq = sequence6.ptr; 798 } 799 } 800 } 801 802 /******************************************* 803 * Call finally block. 804 * Params: 805 * bf = block to call 806 * retregs = registers to preserve across call 807 * Returns: 808 * code generated 809 */ 810 private code *callFinallyBlock(block *bf, regm_t retregs) 811 { 812 CodeBuilder cdbs; cdbs.ctor(); 813 CodeBuilder cdbr; cdbr.ctor(); 814 int nalign = 0; 815 816 calledFinally = true; 817 uint npush = gensaverestore(retregs,cdbs,cdbr); 818 819 if (STACKALIGN >= 16) 820 { npush += REGSIZE; 821 if (npush & (STACKALIGN - 1)) 822 { nalign = STACKALIGN - (npush & (STACKALIGN - 1)); 823 cod3_stackadj(cdbs, nalign); 824 } 825 } 826 cdbs.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf); 827 regcon.immed.mval = 0; 828 if (nalign) 829 cod3_stackadj(cdbs, -nalign); 830 cdbs.append(cdbr); 831 return cdbs.finish(); 832 } 833 834 /******************************* 835 * Generate block exit code 836 */ 837 void outblkexitcode(ref CodeBuilder cdb, block *bl, ref int anyspill, const(char)* sflsave, Symbol** retsym, const regm_t mfuncregsave) 838 { 839 CodeBuilder cdb2; cdb2.ctor(); 840 elem *e = bl.Belem; 841 block *nextb; 842 regm_t retregs = 0; 843 844 if (bl.BC != BCasm) 845 assert(bl.Bcode == null); 846 847 switch (bl.BC) /* block exit condition */ 848 { 849 case BCiftrue: 850 { 851 bool jcond = true; 852 block *bs1 = bl.nthSucc(0); 853 block *bs2 = bl.nthSucc(1); 854 if (bs1 == bl.Bnext) 855 { // Swap bs1 and bs2 856 block *btmp; 857 858 jcond ^= 1; 859 btmp = bs1; 860 bs1 = bs2; 861 bs2 = btmp; 862 } 863 logexp(cdb,e,jcond,FLblock,cast(code *) bs1); 864 nextb = bs2; 865 } 866 L5: 867 if (configv.addlinenumbers && bl.Bsrcpos.Slinnum && 868 !(funcsym_p.ty() & mTYnaked)) 869 { 870 //printf("BCiftrue: %s(%u)\n", bl.Bsrcpos.Sfilename ? bl.Bsrcpos.Sfilename : "", bl.Bsrcpos.Slinnum); 871 cdb.genlinnum(bl.Bsrcpos); 872 } 873 if (nextb != bl.Bnext) 874 { 875 assert(!(bl.Bflags & BFLepilog)); 876 genjmp(cdb,JMP,FLblock,nextb); 877 } 878 break; 879 880 case BCjmptab: 881 case BCifthen: 882 case BCswitch: 883 { 884 assert(!(bl.Bflags & BFLepilog)); 885 doswitch(cdb,bl); // hide messy details 886 break; 887 } 888 version (MARS) 889 { 890 case BCjcatch: // D catch clause of try-catch 891 assert(ehmethod(funcsym_p) != EHmethod.EH_NONE); 892 // Mark all registers as destroyed. This will prevent 893 // register assignments to variables used in catch blocks. 894 getregs(cdb,lpadregs()); 895 896 if (config.ehmethod == EHmethod.EH_DWARF) 897 { 898 /* Each block must have ESP set to the same value it was at the end 899 * of the prolog. But the unwinder calls catch blocks with ESP set 900 * at the value it was when the throwing function was called, which 901 * may have arguments pushed on the stack. 902 * This instruction will reset ESP to the correct offset from EBP. 903 */ 904 cdb.gen1(ESCAPE | ESCfixesp); 905 } 906 goto case_goto; 907 } 908 version (SCPP) 909 { 910 case BCcatch: // C++ catch clause of try-catch 911 // Mark all registers as destroyed. This will prevent 912 // register assignments to variables used in catch blocks. 913 getregs(cdb,allregs | mES); 914 goto case_goto; 915 916 case BCtry: 917 usednteh |= EHtry; 918 if (config.exe == EX_WIN32) 919 usednteh |= NTEHtry; 920 goto case_goto; 921 } 922 case BCgoto: 923 nextb = bl.nthSucc(0); 924 if ((MARS || 925 funcsym_p.Sfunc.Fflags3 & Fnteh) && 926 ehmethod(funcsym_p) != EHmethod.EH_DWARF && 927 bl.Btry != nextb.Btry && 928 nextb.BC != BC_finally) 929 { 930 regm_t retregsx = 0; 931 gencodelem(cdb,e,&retregsx,true); 932 int toindex = nextb.Btry ? nextb.Btry.Bscope_index : -1; 933 assert(bl.Btry); 934 int fromindex = bl.Btry.Bscope_index; 935 version (MARS) 936 { 937 if (toindex + 1 == fromindex) 938 { // Simply call __finally 939 if (bl.Btry && 940 bl.Btry.nthSucc(1).BC == BCjcatch) 941 { 942 goto L5; // it's a try-catch, not a try-finally 943 } 944 } 945 } 946 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 947 config.ehmethod == EHmethod.EH_SEH) 948 { 949 nteh_unwind(cdb,0,toindex); 950 } 951 else 952 { 953 version (MARS) 954 { 955 if (toindex + 1 <= fromindex) 956 { 957 //c = cat(c, linux_unwind(0, toindex)); 958 block *bt; 959 960 //printf("B%d: fromindex = %d, toindex = %d\n", bl.Bdfoidx, fromindex, toindex); 961 bt = bl; 962 while ((bt = bt.Btry) != null && bt.Bscope_index != toindex) 963 { block *bf; 964 965 //printf("\tbt.Bscope_index = %d, bt.Blast_index = %d\n", bt.Bscope_index, bt.Blast_index); 966 bf = bt.nthSucc(1); 967 // Only look at try-finally blocks 968 if (bf.BC == BCjcatch) 969 continue; 970 971 if (bf == nextb) 972 continue; 973 //printf("\tbf = B%d, nextb = B%d\n", bf.Bdfoidx, nextb.Bdfoidx); 974 if (nextb.BC == BCgoto && 975 !nextb.Belem && 976 bf == nextb.nthSucc(0)) 977 continue; 978 979 // call __finally 980 cdb.append(callFinallyBlock(bf.nthSucc(0), retregsx)); 981 } 982 } 983 } 984 } 985 goto L5; 986 } 987 case_goto: 988 { 989 regm_t retregsx = 0; 990 gencodelem(cdb,e,&retregsx,true); 991 if (anyspill) 992 { // Add in the epilog code 993 CodeBuilder cdbstore; cdbstore.ctor(); 994 CodeBuilder cdbload; cdbload.ctor(); 995 996 for (int i = 0; i < anyspill; i++) 997 { Symbol *s = globsym.tab[i]; 998 999 if (s.Sflags & SFLspill && 1000 vec_testbit(dfoidx,s.Srange)) 1001 { 1002 s.Sfl = sflsave[i]; // undo block register assignments 1003 cgreg_spillreg_epilog(bl,s,cdbstore,cdbload); 1004 } 1005 } 1006 cdb.append(cdbstore); 1007 cdb.append(cdbload); 1008 } 1009 nextb = bl.nthSucc(0); 1010 goto L5; 1011 } 1012 1013 case BC_try: 1014 if (config.ehmethod == EHmethod.EH_NONE || funcsym_p.Sfunc.Fflags3 & Feh_none) 1015 { 1016 /* Need to use frame pointer to access locals, not the stack pointer, 1017 * because we'll be calling the BC_finally blocks and the stack will be off. 1018 */ 1019 needframe = 1; 1020 } 1021 else if (config.ehmethod == EHmethod.EH_SEH || config.ehmethod == EHmethod.EH_WIN32) 1022 { 1023 usednteh |= NTEH_try; 1024 nteh_usevars(); 1025 } 1026 else 1027 usednteh |= EHtry; 1028 goto case_goto; 1029 1030 case BC_finally: 1031 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 1032 { 1033 // Mark scratch registers as destroyed. 1034 getregsNoSave(lpadregs()); 1035 1036 regm_t retregsx = 0; 1037 gencodelem(cdb,bl.Belem,&retregsx,true); 1038 1039 // JMP bl.nthSucc(1) 1040 nextb = bl.nthSucc(1); 1041 1042 goto L5; 1043 } 1044 else 1045 { 1046 if (config.ehmethod == EHmethod.EH_SEH || 1047 config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none)) 1048 { 1049 // Mark all registers as destroyed. This will prevent 1050 // register assignments to variables used in finally blocks. 1051 getregsNoSave(lpadregs()); 1052 } 1053 1054 assert(!e); 1055 // Generate CALL to finalizer code 1056 cdb.append(callFinallyBlock(bl.nthSucc(0), 0)); 1057 1058 // JMP bl.nthSucc(1) 1059 nextb = bl.nthSucc(1); 1060 1061 goto L5; 1062 } 1063 1064 case BC_lpad: 1065 { 1066 assert(ehmethod(funcsym_p) == EHmethod.EH_DWARF); 1067 // Mark all registers as destroyed. This will prevent 1068 // register assignments to variables used in finally blocks. 1069 getregsNoSave(lpadregs()); 1070 1071 regm_t retregsx = 0; 1072 gencodelem(cdb,bl.Belem,&retregsx,true); 1073 1074 // JMP bl.nthSucc(0) 1075 nextb = bl.nthSucc(0); 1076 goto L5; 1077 } 1078 1079 case BC_ret: 1080 { 1081 regm_t retregsx = 0; 1082 gencodelem(cdb,e,&retregsx,true); 1083 if (ehmethod(funcsym_p) == EHmethod.EH_DWARF) 1084 { 1085 } 1086 else 1087 cdb.gen1(0xC3); // RET 1088 break; 1089 } 1090 1091 static if (NTEXCEPTIONS) 1092 { 1093 case BC_except: 1094 { 1095 assert(!e); 1096 usednteh |= NTEH_except; 1097 nteh_setsp(cdb,0x8B); 1098 getregsNoSave(allregs); 1099 nextb = bl.nthSucc(0); 1100 goto L5; 1101 } 1102 case BC_filter: 1103 { 1104 nteh_filter(cdb, bl); 1105 // Mark all registers as destroyed. This will prevent 1106 // register assignments to variables used in filter blocks. 1107 getregsNoSave(allregs); 1108 regm_t retregsx = regmask(e.Ety, TYnfunc); 1109 gencodelem(cdb,e,&retregsx,true); 1110 cdb.gen1(0xC3); // RET 1111 break; 1112 } 1113 } 1114 1115 case BCretexp: 1116 retregs = regmask(e.Ety, funcsym_p.ty()); 1117 1118 // For the final load into the return regs, don't set regcon.used, 1119 // so that the optimizer can potentially use retregs for register 1120 // variable assignments. 1121 1122 if (config.flags4 & CFG4optimized) 1123 { regm_t usedsave; 1124 1125 docommas(cdb,&e); 1126 usedsave = regcon.used; 1127 if (!OTleaf(e.Eoper)) 1128 gencodelem(cdb,e,&retregs,true); 1129 else 1130 { 1131 if (e.Eoper == OPconst) 1132 regcon.mvar = 0; 1133 gencodelem(cdb,e,&retregs,true); 1134 regcon.used = usedsave; 1135 if (e.Eoper == OPvar) 1136 { Symbol *s = e.EV.Vsym; 1137 1138 if (s.Sfl == FLreg && s.Sregm != mAX) 1139 *retsym = s; 1140 } 1141 } 1142 } 1143 else 1144 { 1145 gencodelem(cdb,e,&retregs,true); 1146 } 1147 goto L4; 1148 1149 case BCret: 1150 case BCexit: 1151 retregs = 0; 1152 gencodelem(cdb,e,&retregs,true); 1153 L4: 1154 if (retregs == mST0) 1155 { assert(global87.stackused == 1); 1156 pop87(); // account for return value 1157 } 1158 else if (retregs == mST01) 1159 { assert(global87.stackused == 2); 1160 pop87(); 1161 pop87(); // account for return value 1162 } 1163 1164 if (bl.BC == BCexit) 1165 { 1166 if (config.flags4 & CFG4optimized) 1167 mfuncreg = mfuncregsave; 1168 } 1169 else if (MARS || usednteh & NTEH_try) 1170 { 1171 block *bt = bl; 1172 while ((bt = bt.Btry) != null) 1173 { 1174 block *bf = bt.nthSucc(1); 1175 version (MARS) 1176 { 1177 // Only look at try-finally blocks 1178 if (bf.BC == BCjcatch) 1179 { 1180 continue; 1181 } 1182 } 1183 if (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || 1184 config.ehmethod == EHmethod.EH_SEH) 1185 { 1186 if (bt.Bscope_index == 0) 1187 { 1188 // call __finally 1189 CodeBuilder cdbs; cdbs.ctor(); 1190 CodeBuilder cdbr; cdbr.ctor(); 1191 1192 nteh_gensindex(cdb,-1); 1193 gensaverestore(retregs,cdbs,cdbr); 1194 cdb.append(cdbs); 1195 cdb.genc(0xE8,0,0,0,FLblock,cast(targ_size_t)bf.nthSucc(0)); 1196 regcon.immed.mval = 0; 1197 cdb.append(cdbr); 1198 } 1199 else 1200 { 1201 nteh_unwind(cdb,retregs,~0); 1202 } 1203 break; 1204 } 1205 else 1206 { 1207 // call __finally 1208 cdb.append(callFinallyBlock(bf.nthSucc(0), retregs)); 1209 } 1210 } 1211 } 1212 break; 1213 1214 case BCasm: 1215 { 1216 assert(!e); 1217 // Mark destroyed registers 1218 CodeBuilder cdbx; cdbx.ctor(); 1219 getregs(cdbx,iasm_regs(bl)); // mark destroyed registers 1220 code *c = cdbx.finish(); 1221 if (bl.Bsucc) 1222 { nextb = bl.nthSucc(0); 1223 if (!bl.Bnext) 1224 { 1225 cdb.append(bl.Bcode); 1226 cdb.append(c); 1227 goto L5; 1228 } 1229 if (nextb != bl.Bnext && 1230 bl.Bnext && 1231 !(bl.Bnext.BC == BCgoto && 1232 !bl.Bnext.Belem && 1233 nextb == bl.Bnext.nthSucc(0))) 1234 { 1235 // See if already have JMP at end of block 1236 code *cl = code_last(bl.Bcode); 1237 if (!cl || cl.Iop != JMP) 1238 { 1239 cdb.append(bl.Bcode); 1240 cdb.append(c); 1241 goto L5; // add JMP at end of block 1242 } 1243 } 1244 } 1245 cdb.append(bl.Bcode); 1246 break; 1247 } 1248 1249 default: 1250 debug 1251 printf("bl.BC = %d\n",bl.BC); 1252 assert(0); 1253 } 1254 } 1255 1256 /*********************************************** 1257 * Struct necessary for sorting switch cases. 1258 */ 1259 1260 alias _compare_fp_t = extern(C) nothrow int function(const void*, const void*); 1261 extern(C) void qsort(void* base, size_t nmemb, size_t size, _compare_fp_t compar); 1262 1263 extern (C) // qsort cmp functions need to be "C" 1264 { 1265 struct CaseVal 1266 { 1267 targ_ullong val; 1268 block *target; 1269 1270 /* Sort function for qsort() */ 1271 extern (C) static nothrow int cmp(scope const(void*) p, scope const(void*) q) 1272 { 1273 const(CaseVal)* c1 = cast(const(CaseVal)*)p; 1274 const(CaseVal)* c2 = cast(const(CaseVal)*)q; 1275 return (c1.val < c2.val) ? -1 : ((c1.val == c2.val) ? 0 : 1); 1276 } 1277 } 1278 } 1279 1280 /*** 1281 * Generate comparison of [reg2,reg] with val 1282 */ 1283 private void cmpval(ref CodeBuilder cdb, targ_llong val, uint sz, reg_t reg, reg_t reg2, reg_t sreg) 1284 { 1285 if (I64 && sz == 8) 1286 { 1287 assert(reg2 == NOREG); 1288 if (val == cast(int)val) // if val is a 64 bit value sign-extended from 32 bits 1289 { 1290 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,value32 1291 cdb.last().Irex |= REX_W; // 64 bit operand 1292 } 1293 else 1294 { 1295 assert(sreg != NOREG); 1296 movregconst(cdb,sreg,cast(targ_size_t)val,64); // MOV sreg,val64 1297 genregs(cdb,0x3B,reg,sreg); // CMP reg,sreg 1298 code_orrex(cdb.last(), REX_W); 1299 getregsNoSave(mask(sreg)); // don't remember we loaded this constant 1300 } 1301 } 1302 else if (reg2 == NOREG) 1303 cdb.genc2(0x81,modregrmx(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1304 else 1305 { 1306 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1307 code *cnext = gennop(null); 1308 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1309 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)val); // CMP reg,casevalue 1310 cdb.append(cnext); 1311 } 1312 } 1313 1314 private void ifthen(ref CodeBuilder cdb, CaseVal *casevals, size_t ncases, 1315 uint sz, reg_t reg, reg_t reg2, reg_t sreg, block *bdefault, bool last) 1316 { 1317 if (ncases >= 4 && config.flags4 & CFG4speed) 1318 { 1319 size_t pivot = ncases >> 1; 1320 1321 // Compares for casevals[0..pivot] 1322 CodeBuilder cdb1; cdb1.ctor(); 1323 ifthen(cdb1, casevals, pivot, sz, reg, reg2, sreg, bdefault, true); 1324 1325 // Compares for casevals[pivot+1..ncases] 1326 CodeBuilder cdb2; cdb2.ctor(); 1327 ifthen(cdb2, casevals + pivot + 1, ncases - pivot - 1, sz, reg, reg2, sreg, bdefault, last); 1328 code *c2 = gennop(null); 1329 1330 // Compare for caseval[pivot] 1331 cmpval(cdb, casevals[pivot].val, sz, reg, reg2, sreg); 1332 genjmp(cdb,JE,FLblock,casevals[pivot].target); // JE target 1333 // Note uint jump here, as cases were sorted using uint comparisons 1334 genjmp(cdb,JA,FLcode,cast(block *) c2); // JG c2 1335 1336 cdb.append(cdb1); 1337 cdb.append(c2); 1338 cdb.append(cdb2); 1339 } 1340 else 1341 { // Not worth doing a binary search, just do a sequence of CMP/JE 1342 for (size_t n = 0; n < ncases; n++) 1343 { 1344 targ_llong val = casevals[n].val; 1345 cmpval(cdb, val, sz, reg, reg2, sreg); 1346 code *cnext = null; 1347 if (reg2 != NOREG) 1348 { 1349 cnext = gennop(null); 1350 genjmp(cdb,JNE,FLcode,cast(block *) cnext); // JNE cnext 1351 cdb.genc2(0x81,modregrm(3,7,reg2),cast(targ_size_t)MSREG(val)); // CMP reg2,MSREG(casevalue) 1352 } 1353 genjmp(cdb,JE,FLblock,casevals[n].target); // JE caseaddr 1354 cdb.append(cnext); 1355 } 1356 1357 if (last) // if default is not next block 1358 genjmp(cdb,JMP,FLblock,bdefault); 1359 } 1360 } 1361 1362 /******************************* 1363 * Generate code for blocks ending in a switch statement. 1364 * Take BCswitch and decide on 1365 * BCifthen use if - then code 1366 * BCjmptab index into jump table 1367 * BCswitch search table for match 1368 */ 1369 1370 void doswitch(ref CodeBuilder cdb, block *b) 1371 { 1372 targ_ulong msw; 1373 1374 // If switch tables are in code segment and we need a CS: override to get at them 1375 bool csseg = cast(bool)(config.flags & CFGromable); 1376 1377 //printf("doswitch(%d)\n", b.BC); 1378 elem *e = b.Belem; 1379 elem_debug(e); 1380 docommas(cdb,&e); 1381 cgstate.stackclean++; 1382 tym_t tys = tybasic(e.Ety); 1383 int sz = _tysize[tys]; 1384 bool dword = (sz == 2 * REGSIZE); 1385 bool mswsame = true; // assume all msw's are the same 1386 targ_llong *p = b.Bswitch; // pointer to case data 1387 assert(p); 1388 uint ncases = cast(uint)*p++; // number of cases 1389 1390 targ_llong vmax = MINLL; // smallest possible llong 1391 targ_llong vmin = MAXLL; // largest possible llong 1392 for (uint n = 0; n < ncases; n++) // find max and min case values 1393 { 1394 targ_llong val = *p++; 1395 if (val > vmax) vmax = val; 1396 if (val < vmin) vmin = val; 1397 if (REGSIZE == 2) 1398 { 1399 ushort ms = (val >> 16) & 0xFFFF; 1400 if (n == 0) 1401 msw = ms; 1402 else if (msw != ms) 1403 mswsame = 0; 1404 } 1405 else // REGSIZE == 4 1406 { 1407 targ_ulong ms = (val >> 32) & 0xFFFFFFFF; 1408 if (n == 0) 1409 msw = ms; 1410 else if (msw != ms) 1411 mswsame = 0; 1412 } 1413 } 1414 p -= ncases; 1415 //dbg_printf("vmax = x%lx, vmin = x%lx, vmax-vmin = x%lx\n",vmax,vmin,vmax - vmin); 1416 1417 /* Three kinds of switch strategies - pick one 1418 */ 1419 if (ncases <= 3) 1420 goto Lifthen; 1421 else if (I16 && cast(targ_ullong)(vmax - vmin) <= ncases * 2) 1422 goto Ljmptab; // >=50% of the table is case values, rest is default 1423 else if (cast(targ_ullong)(vmax - vmin) <= ncases * 3) 1424 goto Ljmptab; // >= 33% of the table is case values, rest is default 1425 else if (I16) 1426 goto Lswitch; 1427 else 1428 goto Lifthen; 1429 1430 /*************************************************************************/ 1431 { // generate if-then sequence 1432 Lifthen: 1433 regm_t retregs = ALLREGS; 1434 b.BC = BCifthen; 1435 scodelem(cdb,e,&retregs,0,true); 1436 reg_t reg, reg2; 1437 if (dword) 1438 { reg = findreglsw(retregs); 1439 reg2 = findregmsw(retregs); 1440 } 1441 else 1442 { 1443 reg = findreg(retregs); // reg that result is in 1444 reg2 = NOREG; 1445 } 1446 list_t bl = b.Bsucc; 1447 block *bdefault = b.nthSucc(0); 1448 if (dword && mswsame) 1449 { 1450 cdb.genc2(0x81,modregrm(3,7,reg2),msw); // CMP reg2,MSW 1451 genjmp(cdb,JNE,FLblock,bdefault); // JNE default 1452 reg2 = NOREG; 1453 } 1454 1455 reg_t sreg = NOREG; // may need a scratch register 1456 1457 // Put into casevals[0..ncases] so we can sort then slice 1458 CaseVal *casevals = cast(CaseVal *)malloc(ncases * CaseVal.sizeof); 1459 assert(casevals); 1460 for (uint n = 0; n < ncases; n++) 1461 { 1462 casevals[n].val = p[n]; 1463 bl = list_next(bl); 1464 casevals[n].target = list_block(bl); 1465 1466 // See if we need a scratch register 1467 if (sreg == NOREG && I64 && sz == 8 && p[n] != cast(int)p[n]) 1468 { regm_t regm = ALLREGS & ~mask(reg); 1469 allocreg(cdb,®m, &sreg, TYint); 1470 } 1471 } 1472 1473 // Sort cases so we can do a runtime binary search 1474 qsort(casevals, ncases, CaseVal.sizeof, &CaseVal.cmp); 1475 1476 //for (uint n = 0; n < ncases; n++) 1477 //printf("casevals[%lld] = x%x\n", n, casevals[n].val); 1478 1479 // Generate binary tree of comparisons 1480 ifthen(cdb, casevals, ncases, sz, reg, reg2, sreg, bdefault, bdefault != b.Bnext); 1481 1482 free(casevals); 1483 1484 cgstate.stackclean--; 1485 return; 1486 } 1487 1488 /*************************************************************************/ 1489 { 1490 // Use switch value to index into jump table 1491 Ljmptab: 1492 //printf("Ljmptab:\n"); 1493 1494 b.BC = BCjmptab; 1495 1496 /* If vmin is small enough, we can just set it to 0 and the jump 1497 * table entries from 0..vmin-1 can be set with the default target. 1498 * This saves the SUB instruction. 1499 * Must be same computation as used in outjmptab(). 1500 */ 1501 if (vmin > 0 && vmin <= _tysize[TYint]) 1502 vmin = 0; 1503 1504 b.Btablesize = cast(int) (vmax - vmin + 1) * tysize(TYnptr); 1505 regm_t retregs = IDXREGS; 1506 if (dword) 1507 retregs |= mMSW; 1508 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1509 { 1510 if (I32 && config.flags3 & CFG3pic) 1511 retregs &= ~mBX; // need EBX for GOT 1512 } 1513 bool modify = (I16 || I64 || vmin); 1514 scodelem(cdb,e,&retregs,0,!modify); 1515 reg_t reg = findreg(retregs & IDXREGS); // reg that result is in 1516 reg_t reg2; 1517 if (dword) 1518 reg2 = findregmsw(retregs); 1519 if (modify) 1520 { 1521 assert(!(retregs & regcon.mvar)); 1522 getregs(cdb,retregs); 1523 } 1524 if (vmin) // if there is a minimum 1525 { 1526 cdb.genc2(0x81,modregrm(3,5,reg),cast(targ_size_t)vmin); // SUB reg,vmin 1527 if (dword) 1528 { cdb.genc2(0x81,modregrm(3,3,reg2),cast(targ_size_t)MSREG(vmin)); // SBB reg2,vmin 1529 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1530 } 1531 } 1532 else if (dword) 1533 { gentstreg(cdb,reg2); // TEST reg2,reg2 1534 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1535 } 1536 if (vmax - vmin != REGMASK) // if there is a maximum 1537 { // CMP reg,vmax-vmin 1538 cdb.genc2(0x81,modregrm(3,7,reg),cast(targ_size_t)(vmax-vmin)); 1539 if (I64 && sz == 8) 1540 code_orrex(cdb.last(), REX_W); 1541 genjmp(cdb,JA,FLblock,b.nthSucc(0)); // JA default 1542 } 1543 if (I64) 1544 { 1545 if (!vmin) 1546 { // Need to clear out high 32 bits of reg 1547 // Use 8B instead of 89, as 89 will be optimized away as a NOP 1548 genregs(cdb,0x8B,reg,reg); // MOV reg,reg 1549 } 1550 if (config.flags3 & CFG3pic || config.exe == EX_WIN64) 1551 { 1552 /* LEA R1,disp[RIP] 48 8D 05 00 00 00 00 1553 * MOVSXD R2,[reg*4][R1] 48 63 14 B8 1554 * LEA R1,[R1][R2] 48 8D 04 02 1555 * JMP R1 FF E0 1556 */ 1557 reg_t r1; 1558 regm_t scratchm = ALLREGS & ~mask(reg); 1559 allocreg(cdb,&scratchm,&r1,TYint); 1560 reg_t r2; 1561 scratchm = ALLREGS & ~(mask(reg) | mask(r1)); 1562 allocreg(cdb,&scratchm,&r2,TYint); 1563 1564 CodeBuilder cdbe; cdbe.ctor(); 1565 cdbe.genc1(LEA,(REX_W << 16) | modregxrm(0,r1,5),FLswitch,0); // LEA R1,disp[RIP] 1566 cdbe.last().IEV1.Vswitch = b; 1567 cdbe.gen2sib(0x63,(REX_W << 16) | modregxrm(0,r2,4), modregxrmx(2,reg,r1)); // MOVSXD R2,[reg*4][R1] 1568 cdbe.gen2sib(LEA,(REX_W << 16) | modregxrm(0,r1,4),modregxrmx(0,r1,r2)); // LEA R1,[R1][R2] 1569 cdbe.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1570 1571 b.Btablesize = cast(int) (vmax - vmin + 1) * 4; 1572 code *ce = cdbe.finish(); 1573 pinholeopt(ce, null); 1574 1575 cdb.append(cdbe); 1576 } 1577 else 1578 { 1579 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[reg*8] 1580 cdb.last().IEV1.Vswitch = b; 1581 cdb.last().Isib = modregrm(3,reg & 7,5); 1582 if (reg & 8) 1583 cdb.last().Irex |= REX_X; 1584 } 1585 } 1586 else if (I32) 1587 { 1588 static if (JMPJMPTABLE) 1589 { 1590 /* LEA jreg,offset ctable[reg][reg * 4] 1591 JMP jreg 1592 ctable: 1593 JMP case0 1594 JMP case1 1595 ... 1596 */ 1597 CodeBuilder ctable; ctable.ctor(); 1598 block *bdef = b.nthSucc(0); 1599 targ_llong u; 1600 for (u = vmin; ; u++) 1601 { block *targ = bdef; 1602 for (n = 0; n < ncases; n++) 1603 { 1604 if (p[n] == u) 1605 { targ = b.nthSucc(n + 1); 1606 break; 1607 } 1608 } 1609 genjmp(ctable,JMP,FLblock,targ); 1610 ctable.last().Iflags |= CFjmp5; // don't shrink these 1611 if (u == vmax) 1612 break; 1613 } 1614 1615 // Allocate scratch register jreg 1616 regm_t scratchm = ALLREGS & ~mask(reg); 1617 uint jreg = AX; 1618 allocreg(cdb,&scratchm,&jreg,TYint); 1619 1620 // LEA jreg, offset ctable[reg][reg*4] 1621 cdb.genc1(LEA,modregrm(2,jreg,4),FLcode,6); 1622 cdb.last().Isib = modregrm(2,reg,reg); 1623 cdb.gen2(0xFF,modregrm(3,4,jreg)); // JMP jreg 1624 cdb.append(ctable); 1625 b.Btablesize = 0; 1626 cgstate.stackclean--; 1627 return; 1628 } 1629 else static if (TARGET_OSX) 1630 { 1631 /* CALL L1 1632 * L1: POP R1 1633 * ADD R1,disp[reg*4][R1] 1634 * JMP R1 1635 */ 1636 // Allocate scratch register r1 1637 regm_t scratchm = ALLREGS & ~mask(reg); 1638 reg_t r1; 1639 allocreg(cdb,&scratchm,&r1,TYint); 1640 1641 cdb.genc2(CALL,0,0); // CALL L1 1642 cdb.gen1(0x58 + r1); // L1: POP R1 1643 cdb.genc1(0x03,modregrm(2,r1,4),FLswitch,0); // ADD R1,disp[reg*4][EBX] 1644 cdb.last().IEV1.Vswitch = b; 1645 cdb.last().Isib = modregrm(2,reg,r1); 1646 cdb.gen2(0xFF,modregrm(3,4,r1)); // JMP R1 1647 } 1648 else 1649 { 1650 if (config.flags3 & CFG3pic) 1651 { 1652 /* MOV R1,EBX 1653 * SUB R1,funcsym_p@GOTOFF[offset][reg*4][EBX] 1654 * JMP R1 1655 */ 1656 1657 // Load GOT in EBX 1658 load_localgot(cdb); 1659 1660 // Allocate scratch register r1 1661 regm_t scratchm = ALLREGS & ~(mask(reg) | mBX); 1662 reg_t r1; 1663 allocreg(cdb,&scratchm,&r1,TYint); 1664 1665 genmovreg(cdb,r1,BX); // MOV R1,EBX 1666 cdb.genc1(0x2B,modregxrm(2,r1,4),FLswitch,0); // SUB R1,disp[reg*4][EBX] 1667 cdb.last().IEV1.Vswitch = b; 1668 cdb.last().Isib = modregrm(2,reg,BX); 1669 cdb.gen2(0xFF,modregrmx(3,4,r1)); // JMP R1 1670 } 1671 else 1672 { 1673 cdb.genc1(0xFF,modregrm(0,4,4),FLswitch,0); // JMP disp[idxreg*4] 1674 cdb.last().IEV1.Vswitch = b; 1675 cdb.last().Isib = modregrm(2,reg,5); 1676 } 1677 } 1678 } 1679 else if (I16) 1680 { 1681 cdb.gen2(0xD1,modregrm(3,4,reg)); // SHL reg,1 1682 uint rm = getaddrmode(retregs) | modregrm(0,4,0); 1683 cdb.genc1(0xFF,rm,FLswitch,0); // JMP [CS:]disp[idxreg] 1684 cdb.last().IEV1.Vswitch = b; 1685 cdb.last().Iflags |= csseg ? CFcs : 0; // segment override 1686 } 1687 else 1688 assert(0); 1689 cgstate.stackclean--; 1690 return; 1691 } 1692 1693 /*************************************************************************/ 1694 { 1695 /* Scan a table of case values, and jump to corresponding address. 1696 * Since it relies on REPNE SCASW, it has really nothing to recommend it 1697 * over Lifthen for 32 and 64 bit code. 1698 * Note that it has not been tested with MACHOBJ (OSX). 1699 */ 1700 Lswitch: 1701 regm_t retregs = mAX; // SCASW requires AX 1702 if (dword) 1703 retregs |= mDX; 1704 else if (ncases <= 6 || config.flags4 & CFG4speed) 1705 goto Lifthen; 1706 scodelem(cdb,e,&retregs,0,true); 1707 if (dword && mswsame) 1708 { /* CMP DX,MSW */ 1709 cdb.genc2(0x81,modregrm(3,7,DX),msw); 1710 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1711 } 1712 getregs(cdb,mCX|mDI); 1713 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1714 { 1715 if (config.flags3 & CFG3pic) 1716 { // Add in GOT 1717 getregs(cdb,mDX); 1718 cdb.genc2(CALL,0,0); // CALL L1 1719 cdb.gen1(0x58 + DI); // L1: POP EDI 1720 1721 // ADD EDI,_GLOBAL_OFFSET_TABLE_+3 1722 Symbol *gotsym = Obj.getGOTsym(); 1723 cdb.gencs(0x81,modregrm(3,0,DI),FLextern,gotsym); 1724 cdb.last().Iflags = CFoff; 1725 cdb.last().IEV2.Voffset = 3; 1726 1727 makeitextern(gotsym); 1728 1729 genmovreg(cdb, DX, DI); // MOV EDX, EDI 1730 // ADD EDI,offset of switch table 1731 cdb.gencs(0x81,modregrm(3,0,DI),FLswitch,null); 1732 cdb.last().IEV2.Vswitch = b; 1733 } 1734 } 1735 if (!(config.flags3 & CFG3pic)) 1736 { 1737 // MOV DI,offset of switch table 1738 cdb.gencs(0xC7,modregrm(3,0,DI),FLswitch,null); 1739 cdb.last().IEV2.Vswitch = b; 1740 } 1741 movregconst(cdb,CX,ncases,0); // MOV CX,ncases 1742 1743 /* The switch table will be accessed through ES:DI. 1744 * Therefore, load ES with proper segment value. 1745 */ 1746 if (config.flags3 & CFG3eseqds) 1747 { 1748 assert(!csseg); 1749 getregs(cdb,mCX); // allocate CX 1750 } 1751 else 1752 { 1753 getregs(cdb,mES|mCX); // allocate ES and CX 1754 cdb.gen1(csseg ? 0x0E : 0x1E); // PUSH CS/DS 1755 cdb.gen1(0x07); // POP ES 1756 } 1757 1758 targ_size_t disp = (ncases - 1) * _tysize[TYint]; // displacement to jump table 1759 if (dword && !mswsame) 1760 { 1761 1762 /* Build the following: 1763 L1: SCASW 1764 JNE L2 1765 CMP DX,[CS:]disp[DI] 1766 L2: LOOPNE L1 1767 */ 1768 1769 const int mod = (disp > 127) ? 2 : 1; // displacement size 1770 code *cloop = genc2(null,0xE0,0,-7 - mod - csseg); // LOOPNE scasw 1771 cdb.gen1(0xAF); // SCASW 1772 code_orflag(cdb.last(),CFtarg2); // target of jump 1773 genjmp(cdb,JNE,FLcode,cast(block *) cloop); // JNE loop 1774 // CMP DX,[CS:]disp[DI] 1775 cdb.genc1(0x39,modregrm(mod,DX,5),FLconst,disp); 1776 cdb.last().Iflags |= csseg ? CFcs : 0; // possible seg override 1777 cdb.append(cloop); 1778 disp += ncases * _tysize[TYint]; // skip over msw table 1779 } 1780 else 1781 { 1782 cdb.gen1(0xF2); // REPNE 1783 cdb.gen1(0xAF); // SCASW 1784 } 1785 genjmp(cdb,JNE,FLblock,b.nthSucc(0)); // JNE default 1786 const int mod = (disp > 127) ? 2 : 1; // 1 or 2 byte displacement 1787 if (csseg) 1788 cdb.gen1(SEGCS); // table is in code segment 1789 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1790 { 1791 if (config.flags3 & CFG3pic) 1792 { // ADD EDX,(ncases-1)*2[EDI] 1793 cdb.genc1(0x03,modregrm(mod,DX,7),FLconst,disp); 1794 // JMP EDX 1795 cdb.gen2(0xFF,modregrm(3,4,DX)); 1796 } 1797 } 1798 if (!(config.flags3 & CFG3pic)) 1799 { // JMP (ncases-1)*2[DI] 1800 cdb.genc1(0xFF,modregrm(mod,4,(I32 ? 7 : 5)),FLconst,disp); 1801 cdb.last().Iflags |= csseg ? CFcs : 0; 1802 } 1803 b.Btablesize = disp + _tysize[TYint] + ncases * tysize(TYnptr); 1804 //assert(b.Bcode); 1805 cgstate.stackclean--; 1806 return; 1807 } 1808 } 1809 1810 /****************************** 1811 * Output data block for a jump table (BCjmptab). 1812 * The 'holes' in the table get filled with the 1813 * default label. 1814 */ 1815 1816 void outjmptab(block *b) 1817 { 1818 if (JMPJMPTABLE && I32) 1819 return; 1820 1821 targ_llong *p = b.Bswitch; // pointer to case data 1822 size_t ncases = cast(size_t)*p++; // number of cases 1823 1824 /* Find vmin and vmax, the range of the table will be [vmin .. vmax + 1] 1825 * Must be same computation as used in doswitch(). 1826 */ 1827 targ_llong vmax = MINLL; // smallest possible llong 1828 targ_llong vmin = MAXLL; // largest possible llong 1829 for (size_t n = 0; n < ncases; n++) // find min case value 1830 { targ_llong val = p[n]; 1831 if (val > vmax) vmax = val; 1832 if (val < vmin) vmin = val; 1833 } 1834 if (vmin > 0 && vmin <= _tysize[TYint]) 1835 vmin = 0; 1836 assert(vmin <= vmax); 1837 1838 /* Segment and offset into which the jump table will be emitted 1839 */ 1840 int jmpseg = objmod.jmpTableSegment(funcsym_p); 1841 targ_size_t *poffset = &Offset(jmpseg); 1842 1843 /* Align start of jump table 1844 */ 1845 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 1846 objmod.lidata(jmpseg,*poffset,alignbytes); 1847 assert(*poffset == b.Btableoffset); // should match precomputed value 1848 1849 Symbol *gotsym = null; 1850 targ_size_t def = b.nthSucc(0).Boffset; // default address 1851 for (targ_llong u = vmin; ; u++) 1852 { targ_size_t targ = def; // default 1853 for (size_t n = 0; n < ncases; n++) 1854 { if (p[n] == u) 1855 { targ = b.nthSucc(cast(int)(n + 1)).Boffset; 1856 break; 1857 } 1858 } 1859 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 1860 { 1861 if (I64) 1862 { 1863 if (config.flags3 & CFG3pic) 1864 { 1865 objmod.reftodatseg(jmpseg,*poffset,targ + (u - vmin) * 4,funcsym_p.Sseg,CFswitch); 1866 *poffset += 4; 1867 } 1868 else 1869 { 1870 objmod.reftodatseg(jmpseg,*poffset,targ,funcsym_p.Sxtrnnum,CFoffset64 | CFswitch); 1871 *poffset += 8; 1872 } 1873 } 1874 else 1875 { 1876 if (config.flags3 & CFG3pic) 1877 { 1878 assert(config.flags & CFGromable); 1879 // Want a GOTPC fixup to _GLOBAL_OFFSET_TABLE_ 1880 if (!gotsym) 1881 gotsym = Obj.getGOTsym(); 1882 objmod.reftoident(jmpseg,*poffset,gotsym,*poffset - targ,CFswitch); 1883 } 1884 else 1885 objmod.reftocodeseg(jmpseg,*poffset,targ); 1886 *poffset += 4; 1887 } 1888 } 1889 else static if (TARGET_OSX) 1890 { 1891 targ_size_t val; 1892 if (I64) 1893 val = targ - b.Btableoffset; 1894 else 1895 val = targ - b.Btablebase; 1896 objmod.write_bytes(SegData[jmpseg],4,&val); 1897 } 1898 else static if (TARGET_WINDOS) 1899 { 1900 if (I64) 1901 { 1902 targ_size_t val = targ - b.Btableoffset; 1903 objmod.write_bytes(SegData[jmpseg],4,&val); 1904 } 1905 else 1906 { 1907 objmod.reftocodeseg(jmpseg,*poffset,targ); 1908 *poffset += tysize(TYnptr); 1909 } 1910 } 1911 else 1912 assert(0); 1913 1914 if (u == vmax) // for case that (vmax == ~0) 1915 break; 1916 } 1917 } 1918 1919 1920 /****************************** 1921 * Output data block for a switch table. 1922 * Two consecutive tables, the first is the case value table, the 1923 * second is the address table. 1924 */ 1925 1926 void outswitab(block *b) 1927 { 1928 //printf("outswitab()\n"); 1929 targ_llong *p = b.Bswitch; // pointer to case data 1930 uint ncases = cast(uint)*p++; // number of cases 1931 1932 const int seg = objmod.jmpTableSegment(funcsym_p); 1933 targ_size_t *poffset = &Offset(seg); 1934 targ_size_t offset = *poffset; 1935 targ_size_t alignbytes = _align(0,*poffset) - *poffset; 1936 objmod.lidata(seg,*poffset,alignbytes); // any alignment bytes necessary 1937 assert(*poffset == offset + alignbytes); 1938 1939 uint sz = _tysize[TYint]; 1940 assert(SegData[seg].SDseg == seg); 1941 for (uint n = 0; n < ncases; n++) // send out value table 1942 { 1943 //printf("\tcase %d, offset = x%x\n", n, *poffset); 1944 objmod.write_bytes(SegData[seg],sz,p); 1945 p++; 1946 } 1947 offset += alignbytes + sz * ncases; 1948 assert(*poffset == offset); 1949 1950 if (b.Btablesize == ncases * (REGSIZE * 2 + tysize(TYnptr))) 1951 { 1952 // Send out MSW table 1953 p -= ncases; 1954 for (uint n = 0; n < ncases; n++) 1955 { 1956 targ_size_t val = cast(targ_size_t)MSREG(*p); 1957 p++; 1958 objmod.write_bytes(SegData[seg],REGSIZE,&val); 1959 } 1960 offset += REGSIZE * ncases; 1961 assert(*poffset == offset); 1962 } 1963 1964 list_t bl = b.Bsucc; 1965 for (uint n = 0; n < ncases; n++) // send out address table 1966 { 1967 bl = list_next(bl); 1968 objmod.reftocodeseg(seg,*poffset,list_block(bl).Boffset); 1969 *poffset += tysize(TYnptr); 1970 } 1971 assert(*poffset == offset + ncases * tysize(TYnptr)); 1972 } 1973 1974 /***************************** 1975 * Return a jump opcode relevant to the elem for a JMP true. 1976 */ 1977 1978 int jmpopcode(elem *e) 1979 { 1980 tym_t tym; 1981 int zero,i,jp,op; 1982 static immutable ubyte[6][2][2] jops = 1983 [ /* <= > < >= == != <=0 >0 <0 >=0 ==0 !=0 */ 1984 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JS ,JNS,JE ,JNE] ], /* signed */ 1985 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JE ,JNE,JB ,JAE,JE ,JNE] ], /* uint */ 1986 /+ 1987 [ [JLE,JG ,JL ,JGE,JE ,JNE],[JLE,JG ,JL ,JGE,JE ,JNE] ], /* real */ 1988 [ [JBE,JA ,JB ,JAE,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 */ 1989 [ [JA ,JBE,JAE,JB ,JE ,JNE],[JBE,JA ,JB ,JAE,JE ,JNE] ], /* 8087 R */ 1990 +/ 1991 ]; 1992 1993 enum 1994 { 1995 XP = (JP << 8), 1996 XNP = (JNP << 8), 1997 } 1998 static immutable uint[26][1] jfops = 1999 /* le gt lt ge eqeq ne unord lg leg ule ul uge */ 2000 [ 2001 [ XNP|JBE,JA,XNP|JB,JAE,XNP|JE, XP|JNE,JP, JNE,JNP, JBE,JC,XP|JAE, 2002 2003 /* ug ue ngt nge nlt nle ord nlg nleg nule nul nuge nug nue */ 2004 XP|JA,JE,JBE,JB, XP|JAE,XP|JA, JNP,JE, JP, JA, JNC,XNP|JB, XNP|JBE,JNE ], /* 8087 */ 2005 ]; 2006 2007 assert(e); 2008 while (e.Eoper == OPcomma || 2009 /* The OTleaf(e.EV.E1.Eoper) is to line up with the case in cdeq() where */ 2010 /* we decide if mPSW is passed on when evaluating E2 or not. */ 2011 (e.Eoper == OPeq && OTleaf(e.EV.E1.Eoper))) 2012 { 2013 e = e.EV.E2; /* right operand determines it */ 2014 } 2015 2016 op = e.Eoper; 2017 tym_t tymx = tybasic(e.Ety); 2018 bool needsNanCheck = tyfloating(tymx) && config.inline8087 && 2019 (tymx == TYldouble || tymx == TYildouble || tymx == TYcldouble || 2020 tymx == TYcdouble || tymx == TYcfloat || 2021 (tyxmmreg(tymx) && config.fpxmmregs && e.Ecount != e.Ecomsub) || 2022 op == OPind || 2023 (OTcall(op) && (regmask(tymx, tybasic(e.EV.E1.Eoper)) & (mST0 | XMMREGS)))); 2024 if (e.Ecount != e.Ecomsub) // comsubs just get Z bit set 2025 { 2026 if (needsNanCheck) // except for floating point values that need a NaN check 2027 return XP|JNE; 2028 else 2029 return JNE; 2030 } 2031 if (!OTrel(op)) // not relational operator 2032 { 2033 if (needsNanCheck) 2034 return XP|JNE; 2035 2036 if (op == OPu32_64) { e = e.EV.E1; op = e.Eoper; } 2037 if (op == OPu16_32) { e = e.EV.E1; op = e.Eoper; } 2038 if (op == OPu8_16) op = e.EV.E1.Eoper; 2039 return ((op >= OPbt && op <= OPbts) || op == OPbtst) ? JC : JNE; 2040 } 2041 2042 if (e.EV.E2.Eoper == OPconst) 2043 zero = !boolres(e.EV.E2); 2044 else 2045 zero = 0; 2046 2047 tym = e.EV.E1.Ety; 2048 if (tyfloating(tym)) 2049 { 2050 static if (1) 2051 { 2052 i = 0; 2053 if (config.inline8087) 2054 { i = 1; 2055 2056 static if (1) 2057 { 2058 if (rel_exception(op) || config.flags4 & CFG4fastfloat) 2059 { 2060 const bool NOSAHF = (I64 || config.fpxmmregs); 2061 if (zero) 2062 { 2063 if (NOSAHF) 2064 op = swaprel(op); 2065 } 2066 else if (NOSAHF) 2067 op = swaprel(op); 2068 else if (cmporder87(e.EV.E2)) 2069 op = swaprel(op); 2070 else 2071 { } 2072 } 2073 else 2074 { 2075 if (zero && config.target_cpu < TARGET_80386) 2076 { } 2077 else 2078 op = swaprel(op); 2079 } 2080 } 2081 else 2082 { 2083 if (zero && !rel_exception(op) && config.target_cpu >= TARGET_80386) 2084 op = swaprel(op); 2085 else if (!zero && 2086 (cmporder87(e.EV.E2) || !(rel_exception(op) || config.flags4 & CFG4fastfloat))) 2087 /* compare is reversed */ 2088 op = swaprel(op); 2089 } 2090 } 2091 jp = jfops[0][op - OPle]; 2092 goto L1; 2093 } 2094 else 2095 { 2096 i = (config.inline8087) ? (3 + cmporder87(e.EV.E2)) : 2; 2097 } 2098 } 2099 else if (tyuns(tym) || tyuns(e.EV.E2.Ety)) 2100 i = 1; 2101 else if (tyintegral(tym) || typtr(tym)) 2102 i = 0; 2103 else 2104 { 2105 debug 2106 elem_print(e); 2107 WRTYxx(tym); 2108 assert(0); 2109 } 2110 2111 jp = jops[i][zero][op - OPle]; /* table starts with OPle */ 2112 2113 /* Try to rewrite uint comparisons so they rely on just the Carry flag 2114 */ 2115 if (i == 1 && (jp == JA || jp == JBE) && 2116 (e.EV.E2.Eoper != OPconst && e.EV.E2.Eoper != OPrelconst)) 2117 { 2118 jp = (jp == JA) ? JC : JNC; 2119 } 2120 2121 L1: 2122 debug 2123 if ((jp & 0xF0) != 0x70) 2124 { 2125 WROP(op); 2126 printf("i %d zero %d op x%x jp x%x\n",i,zero,op,jp); 2127 } 2128 2129 assert((jp & 0xF0) == 0x70); 2130 return jp; 2131 } 2132 2133 /********************************** 2134 * Append code to cdb which validates pointer described by 2135 * addressing mode in *pcs. Modify addressing mode in *pcs. 2136 * Params: 2137 * cdb = append generated code to this 2138 * pcs = original addressing mode to be updated 2139 * keepmsk = mask of registers we must not destroy or use 2140 * if (keepmsk & RMstore), this will be only a store operation 2141 * into the lvalue 2142 */ 2143 2144 void cod3_ptrchk(ref CodeBuilder cdb,code *pcs,regm_t keepmsk) 2145 { 2146 ubyte sib; 2147 reg_t reg; 2148 uint flagsave; 2149 2150 assert(!I64); 2151 if (!I16 && pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2152 return; // not designed to deal with 48 bit far pointers 2153 2154 ubyte rm = pcs.Irm; 2155 assert(!(rm & 0x40)); // no disp8 or reg addressing modes 2156 2157 // If the addressing mode is already a register 2158 reg = rm & 7; 2159 if (I16) 2160 { static immutable ubyte[8] imode = [ BP,BP,BP,BP,SI,DI,BP,BX ]; 2161 2162 reg = imode[reg]; // convert [SI] to SI, etc. 2163 } 2164 regm_t idxregs = mask(reg); 2165 if ((rm & 0x80 && (pcs.IFL1 != FLoffset || pcs.IEV1.Vuns)) || 2166 !(idxregs & ALLREGS) 2167 ) 2168 { 2169 // Load the offset into a register, so we can push the address 2170 regm_t idxregs2 = (I16 ? IDXREGS : ALLREGS) & ~keepmsk; // only these can be index regs 2171 assert(idxregs2); 2172 allocreg(cdb,&idxregs2,®,TYoffset); 2173 2174 const opsave = pcs.Iop; 2175 flagsave = pcs.Iflags; 2176 pcs.Iop = LEA; 2177 pcs.Irm |= modregrm(0,reg,0); 2178 pcs.Iflags &= ~(CFopsize | CFss | CFes | CFcs); // no prefix bytes needed 2179 cdb.gen(pcs); // LEA reg,EA 2180 2181 pcs.Iflags = flagsave; 2182 pcs.Iop = opsave; 2183 } 2184 2185 // registers destroyed by the function call 2186 //used = (mBP | ALLREGS | mES) & ~fregsaved; 2187 regm_t used = 0; // much less code generated this way 2188 2189 code *cs2 = null; 2190 regm_t tosave = used & (keepmsk | idxregs); 2191 for (int i = 0; tosave; i++) 2192 { 2193 regm_t mi = mask(i); 2194 2195 assert(i < REGMAX); 2196 if (mi & tosave) /* i = register to save */ 2197 { 2198 int push,pop; 2199 2200 stackchanged = 1; 2201 if (i == ES) 2202 { push = 0x06; 2203 pop = 0x07; 2204 } 2205 else 2206 { push = 0x50 + i; 2207 pop = push | 8; 2208 } 2209 cdb.gen1(push); // PUSH i 2210 cs2 = cat(gen1(null,pop),cs2); // POP i 2211 tosave &= ~mi; 2212 } 2213 } 2214 2215 // For 16 bit models, push a far pointer 2216 if (I16) 2217 { 2218 int segreg; 2219 2220 switch (pcs.Iflags & (CFes | CFss | CFcs | CFds | CFfs | CFgs)) 2221 { case CFes: segreg = 0x06; break; 2222 case CFss: segreg = 0x16; break; 2223 case CFcs: segreg = 0x0E; break; 2224 case 0: segreg = 0x1E; break; // DS 2225 default: 2226 assert(0); 2227 } 2228 2229 // See if we should default to SS: 2230 // (Happens when BP is part of the addressing mode) 2231 if (segreg == 0x1E && (rm & 0xC0) != 0xC0 && 2232 rm & 2 && (rm & 7) != 7) 2233 { 2234 segreg = 0x16; 2235 if (config.wflags & WFssneds) 2236 pcs.Iflags |= CFss; // because BP won't be there anymore 2237 } 2238 cdb.gen1(segreg); // PUSH segreg 2239 } 2240 2241 cdb.gen1(0x50 + reg); // PUSH reg 2242 2243 // Rewrite the addressing mode in *pcs so it is just 0[reg] 2244 setaddrmode(pcs, idxregs); 2245 pcs.IFL1 = FLoffset; 2246 pcs.IEV1.Vuns = 0; 2247 2248 // Call the validation function 2249 { 2250 makeitextern(getRtlsym(RTLSYM_PTRCHK)); 2251 2252 used &= ~(keepmsk | idxregs); // regs destroyed by this exercise 2253 getregs(cdb,used); 2254 // CALL __ptrchk 2255 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_PTRCHK)); 2256 } 2257 2258 cdb.append(cs2); 2259 } 2260 2261 /*********************************** 2262 * Determine if BP can be used as a general purpose register. 2263 * Note parallels between this routine and prolog(). 2264 * Returns: 2265 * 0 can't be used, needed for frame 2266 * mBP can be used 2267 */ 2268 2269 regm_t cod3_useBP() 2270 { 2271 tym_t tym; 2272 tym_t tyf; 2273 2274 // Note that DOSX memory model cannot use EBP as a general purpose 2275 // register, as SS != DS. 2276 if (!(config.exe & EX_flat) || config.flags & (CFGalwaysframe | CFGnoebp)) 2277 goto Lcant; 2278 2279 if (anyiasm) 2280 goto Lcant; 2281 2282 tyf = funcsym_p.ty(); 2283 if (tyf & mTYnaked) // if no prolog/epilog for function 2284 goto Lcant; 2285 2286 if (funcsym_p.Sfunc.Fflags3 & Ffakeeh) 2287 { 2288 goto Lcant; // need consistent stack frame 2289 } 2290 2291 tym = tybasic(tyf); 2292 if (tym == TYifunc) 2293 goto Lcant; 2294 2295 stackoffsets(0); 2296 localsize = Auto.offset + Fast.offset; // an estimate only 2297 // if (localsize) 2298 { 2299 if (!(config.flags4 & CFG4speed) || 2300 config.target_cpu < TARGET_Pentium || 2301 tyfarfunc(tym) || 2302 config.flags & CFGstack || 2303 localsize >= 0x100 || // arbitrary value < 0x1000 2304 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru)) || 2305 calledFinally || 2306 Alloca.size 2307 ) 2308 goto Lcant; 2309 } 2310 return mBP; 2311 2312 Lcant: 2313 return 0; 2314 } 2315 2316 /************************************************* 2317 * Generate code segment to be used later to restore a cse 2318 */ 2319 2320 bool cse_simple(code *c, elem *e) 2321 { 2322 regm_t regm; 2323 reg_t reg; 2324 int sz = tysize(e.Ety); 2325 2326 if (!I16 && // don't bother with 16 bit code 2327 e.Eoper == OPadd && 2328 sz == REGSIZE && 2329 e.EV.E2.Eoper == OPconst && 2330 e.EV.E1.Eoper == OPvar && 2331 isregvar(e.EV.E1,®m,®) && 2332 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2333 ) 2334 { 2335 memset(c,0,(*c).sizeof); 2336 2337 // Make this an LEA instruction 2338 c.Iop = LEA; 2339 buildEA(c,reg,-1,1,e.EV.E2.EV.Vuns); 2340 if (I64) 2341 { if (sz == 8) 2342 c.Irex |= REX_W; 2343 } 2344 2345 return true; 2346 } 2347 else if (e.Eoper == OPind && 2348 sz <= REGSIZE && 2349 e.EV.E1.Eoper == OPvar && 2350 isregvar(e.EV.E1,®m,®) && 2351 (I32 || I64 || regm & IDXREGS) && 2352 !(e.EV.E1.EV.Vsym.Sflags & SFLspill) 2353 ) 2354 { 2355 memset(c,0,(*c).sizeof); 2356 2357 // Make this a MOV instruction 2358 c.Iop = (sz == 1) ? 0x8A : 0x8B; // MOV reg,EA 2359 buildEA(c,reg,-1,1,0); 2360 if (sz == 2 && I32) 2361 c.Iflags |= CFopsize; 2362 else if (I64) 2363 { if (sz == 8) 2364 c.Irex |= REX_W; 2365 } 2366 2367 return true; 2368 } 2369 return false; 2370 } 2371 2372 /************************** 2373 * Store `reg` to the common subexpression save area in index `slot`. 2374 * Params: 2375 * cdb = where to write code to 2376 * tym = type of value that's in `reg` 2377 * reg = register to save 2378 * slot = index into common subexpression save area 2379 */ 2380 void gen_storecse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2381 { 2382 // MOV slot[BP],reg 2383 if (isXMMreg(reg) && config.fpxmmregs) // watch out for ES 2384 { 2385 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2386 const op = xmmstore(tym, aligned); 2387 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2388 return; 2389 } 2390 opcode_t op = STO; // normal mov 2391 if (reg == ES) 2392 { 2393 reg = 0; // the real reg number 2394 op = 0x8C; // segment reg mov 2395 } 2396 cdb.genc1(op,modregxrm(2, reg, BPRM),FLcs,cast(targ_uns)slot); 2397 if (I64) 2398 code_orrex(cdb.last(), REX_W); 2399 } 2400 2401 void gen_testcse(ref CodeBuilder cdb, tym_t tym, uint sz, size_t slot) 2402 { 2403 // CMP slot[BP],0 2404 cdb.genc(sz == 1 ? 0x80 : 0x81,modregrm(2,7,BPRM), 2405 FLcs,cast(targ_uns)slot, FLconst,cast(targ_uns) 0); 2406 if ((I64 || I32) && sz == 2) 2407 cdb.last().Iflags |= CFopsize; 2408 if (I64 && sz == 8) 2409 code_orrex(cdb.last(), REX_W); 2410 } 2411 2412 void gen_loadcse(ref CodeBuilder cdb, tym_t tym, reg_t reg, size_t slot) 2413 { 2414 // MOV reg,slot[BP] 2415 if (isXMMreg(reg) && config.fpxmmregs) 2416 { 2417 const aligned = tyvector(tym) ? STACKALIGN >= 16 : true; 2418 const op = xmmload(tym, aligned); 2419 cdb.genc1(op,modregxrm(2, reg - XMM0, BPRM),FLcs,cast(targ_size_t)slot); 2420 return; 2421 } 2422 opcode_t op = LOD; 2423 if (reg == ES) 2424 { 2425 op = 0x8E; 2426 reg = 0; 2427 } 2428 cdb.genc1(op,modregxrm(2,reg,BPRM),FLcs,cast(targ_uns)slot); 2429 if (I64) 2430 code_orrex(cdb.last(), REX_W); 2431 } 2432 2433 /*************************************** 2434 * Gen code for OPframeptr 2435 */ 2436 2437 void cdframeptr(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2438 { 2439 regm_t retregs = *pretregs & allregs; 2440 if (!retregs) 2441 retregs = allregs; 2442 reg_t reg; 2443 allocreg(cdb,&retregs, ®, TYint); 2444 2445 code cs; 2446 cs.Iop = ESCAPE | ESCframeptr; 2447 cs.Iflags = 0; 2448 cs.Irex = 0; 2449 cs.Irm = cast(ubyte)reg; 2450 cdb.gen(&cs); 2451 fixresult(cdb,e,retregs,pretregs); 2452 } 2453 2454 /*************************************** 2455 * Gen code for load of _GLOBAL_OFFSET_TABLE_. 2456 * This value gets cached in the local variable 'localgot'. 2457 */ 2458 2459 void cdgot(ref CodeBuilder cdb, elem *e, regm_t *pretregs) 2460 { 2461 static if (TARGET_OSX) 2462 { 2463 regm_t retregs = *pretregs & allregs; 2464 if (!retregs) 2465 retregs = allregs; 2466 reg_t reg; 2467 allocreg(cdb,&retregs, ®, TYnptr); 2468 2469 cdb.genc(CALL,0,0,0,FLgot,0); // CALL L1 2470 cdb.gen1(0x58 + reg); // L1: POP reg 2471 2472 fixresult(cdb,e,retregs,pretregs); 2473 } 2474 else static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2475 { 2476 regm_t retregs = *pretregs & allregs; 2477 if (!retregs) 2478 retregs = allregs; 2479 reg_t reg; 2480 allocreg(cdb,&retregs, ®, TYnptr); 2481 2482 cdb.genc2(CALL,0,0); // CALL L1 2483 cdb.gen1(0x58 + reg); // L1: POP reg 2484 2485 // ADD reg,_GLOBAL_OFFSET_TABLE_+3 2486 Symbol *gotsym = Obj.getGOTsym(); 2487 cdb.gencs(0x81,modregrm(3,0,reg),FLextern,gotsym); 2488 /* Because the 2:3 offset from L1: is hardcoded, 2489 * this sequence of instructions must not 2490 * have any instructions in between, 2491 * so set CFvolatile to prevent the scheduler from rearranging it. 2492 */ 2493 code *cgot = cdb.last(); 2494 cgot.Iflags = CFoff | CFvolatile; 2495 cgot.IEV2.Voffset = (reg == AX) ? 2 : 3; 2496 2497 makeitextern(gotsym); 2498 fixresult(cdb,e,retregs,pretregs); 2499 } 2500 else 2501 assert(0); 2502 } 2503 2504 /************************************************** 2505 * Load contents of localgot into EBX. 2506 */ 2507 2508 void load_localgot(ref CodeBuilder cdb) 2509 { 2510 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2511 { 2512 if (config.flags3 & CFG3pic && I32) 2513 { 2514 if (localgot && !(localgot.Sflags & SFLdead)) 2515 { 2516 localgot.Sflags &= ~GTregcand; // because this hack doesn't work with reg allocator 2517 elem *e = el_var(localgot); 2518 regm_t retregs = mBX; 2519 codelem(cdb,e,&retregs,false); 2520 el_free(e); 2521 } 2522 else 2523 { 2524 elem *e = el_long(TYnptr, 0); 2525 e.Eoper = OPgot; 2526 regm_t retregs = mBX; 2527 codelem(cdb,e,&retregs,false); 2528 el_free(e); 2529 } 2530 } 2531 } 2532 } 2533 2534 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 2535 { 2536 /***************************** 2537 * Returns: 2538 * # of bytes stored 2539 */ 2540 2541 2542 private int obj_namestring(char *p,const(char)* name) 2543 { 2544 size_t len = strlen(name); 2545 if (len > 255) 2546 { 2547 short *ps = cast(short *)p; 2548 p[0] = 0xFF; 2549 p[1] = 0; 2550 ps[1] = cast(short)len; 2551 memcpy(p + 4,name,len); 2552 const int ONS_OHD = 4; // max # of extra bytes added by obj_namestring() 2553 len += ONS_OHD; 2554 } 2555 else 2556 { 2557 p[0] = cast(char)len; 2558 memcpy(p + 1,name,len); 2559 len++; 2560 } 2561 return cast(int)len; 2562 } 2563 } 2564 2565 void genregs(ref CodeBuilder cdb,opcode_t op,uint dstreg,uint srcreg) 2566 { 2567 return cdb.gen2(op,modregxrmx(3,dstreg,srcreg)); 2568 } 2569 2570 void gentstreg(ref CodeBuilder cdb, uint t) 2571 { 2572 cdb.gen2(0x85,modregxrmx(3,t,t)); // TEST t,t 2573 code_orflag(cdb.last(),CFpsw); 2574 } 2575 2576 void genpush(ref CodeBuilder cdb, reg_t reg) 2577 { 2578 cdb.gen1(0x50 + (reg & 7)); 2579 if (reg & 8) 2580 code_orrex(cdb.last(), REX_B); 2581 } 2582 2583 void genpop(ref CodeBuilder cdb, reg_t reg) 2584 { 2585 cdb.gen1(0x58 + (reg & 7)); 2586 if (reg & 8) 2587 code_orrex(cdb.last(), REX_B); 2588 } 2589 2590 /************************** 2591 * Generate a MOV to,from register instruction. 2592 * Smart enough to dump redundant register moves, and segment 2593 * register moves. 2594 */ 2595 2596 code *genmovreg(uint to,uint from) 2597 { 2598 CodeBuilder cdb; cdb.ctor(); 2599 genmovreg(cdb, to, from); 2600 return cdb.finish(); 2601 } 2602 2603 void genmovreg(ref CodeBuilder cdb,uint to,uint from) 2604 { 2605 genmovreg(cdb, to, from, TYMAX); 2606 } 2607 2608 void genmovreg(ref CodeBuilder cdb, uint to, uint from, tym_t tym) 2609 { 2610 // register kind. ex: GPR,XMM,SEG 2611 static uint _K(uint reg) 2612 { 2613 switch (reg) 2614 { 2615 case ES: return ES; 2616 case XMM15: 2617 case XMM0: .. case XMM7: return XMM0; 2618 case AX: .. case R15: return AX; 2619 default: return reg; 2620 } 2621 } 2622 2623 // kind combination (order kept) 2624 static uint _X(uint to, uint from) { return (_K(to) << 8) + _K(from); } 2625 2626 if (to != from) 2627 { 2628 if (tym == TYMAX) tym = TYsize_t; // avoid register slicing 2629 switch (_X(to, from)) 2630 { 2631 case _X(AX, AX): 2632 genregs(cdb, 0x89, from, to); // MOV to,from 2633 if (I64 && tysize(tym) >= 8) 2634 code_orrex(cdb.last(), REX_W); 2635 else if (tysize(tym) == 2) 2636 code_orflag(cdb.last(), CFopsize); 2637 break; 2638 2639 case _X(XMM0, XMM0): // MOVD/Q to,from 2640 genregs(cdb, xmmload(tym), to-XMM0, from-XMM0); 2641 checkSetVex(cdb.last(), tym); 2642 break; 2643 2644 case _X(AX, XMM0): // MOVD/Q to,from 2645 genregs(cdb, STOD, from-XMM0, to); 2646 if (I64 && tysize(tym) >= 8) 2647 code_orrex(cdb.last(), REX_W); 2648 checkSetVex(cdb.last(), tym); 2649 break; 2650 2651 case _X(XMM0, AX): // MOVD/Q to,from 2652 genregs(cdb, LODD, to-XMM0, from); 2653 if (I64 && tysize(tym) >= 8) 2654 code_orrex(cdb.last(), REX_W); 2655 checkSetVex(cdb.last(), tym); 2656 break; 2657 2658 case _X(ES, AX): 2659 assert(tysize(tym) <= REGSIZE); 2660 genregs(cdb, 0x8E, 0, from); 2661 break; 2662 2663 case _X(AX, ES): 2664 assert(tysize(tym) <= REGSIZE); 2665 genregs(cdb, 0x8C, 0, to); 2666 break; 2667 2668 default: 2669 debug printf("genmovreg(to = %s, from = %s)\n" 2670 , regm_str(mask(to)), regm_str(mask(from))); 2671 assert(0); 2672 } 2673 } 2674 } 2675 2676 /*************************************** 2677 * Generate immediate multiply instruction for r1=r2*imm. 2678 * Optimize it into LEA's if we can. 2679 */ 2680 2681 void genmulimm(ref CodeBuilder cdb,uint r1,uint r2,targ_int imm) 2682 { 2683 // These optimizations should probably be put into pinholeopt() 2684 switch (imm) 2685 { 2686 case 1: 2687 genmovreg(cdb,r1,r2); 2688 break; 2689 2690 case 5: 2691 { 2692 code cs; 2693 cs.Iop = LEA; 2694 cs.Iflags = 0; 2695 cs.Irex = 0; 2696 buildEA(&cs,r2,r2,4,0); 2697 cs.orReg(r1); 2698 cdb.gen(&cs); 2699 break; 2700 } 2701 2702 default: 2703 cdb.genc2(0x69,modregxrmx(3,r1,r2),imm); // IMUL r1,r2,imm 2704 break; 2705 } 2706 } 2707 2708 /****************************** 2709 * Load CX with the value of _AHSHIFT. 2710 */ 2711 2712 void genshift(ref CodeBuilder cdb) 2713 { 2714 version (SCPP) 2715 { 2716 // Set up ahshift to trick ourselves into giving the right fixup, 2717 // which must be seg-relative, external frame, external target. 2718 cdb.gencs(0xC7,modregrm(3,0,CX),FLfunc,getRtlsym(RTLSYM_AHSHIFT)); 2719 cdb.last().Iflags |= CFoff; 2720 } 2721 else 2722 assert(0); 2723 } 2724 2725 /****************************** 2726 * Move constant value into reg. 2727 * Take advantage of existing values in registers. 2728 * If flags & mPSW 2729 * set flags based on result 2730 * Else if flags & 8 2731 * do not disturb flags 2732 * Else 2733 * don't care about flags 2734 * If flags & 1 then byte move 2735 * If flags & 2 then short move (for I32 and I64) 2736 * If flags & 4 then don't disturb unused portion of register 2737 * If flags & 16 then reg is a byte register AL..BH 2738 * If flags & 64 (0x40) then 64 bit move (I64 only) 2739 * Returns: 2740 * code (if any) generated 2741 */ 2742 2743 void movregconst(ref CodeBuilder cdb,reg_t reg,targ_size_t value,regm_t flags) 2744 { 2745 reg_t r; 2746 regm_t mreg; 2747 2748 //printf("movregconst(reg=%s, value= %lld (%llx), flags=%x)\n", regm_str(mask(reg)), value, value, flags); 2749 2750 regm_t regm = regcon.immed.mval & mask(reg); 2751 targ_size_t regv = regcon.immed.value[reg]; 2752 2753 if (flags & 1) // 8 bits 2754 { 2755 value &= 0xFF; 2756 regm &= BYTEREGS; 2757 2758 // If we already have the right value in the right register 2759 if (regm && (regv & 0xFF) == value) 2760 goto L2; 2761 2762 if (flags & 16 && reg & 4 && // if an H byte register 2763 regcon.immed.mval & mask(reg & 3) && 2764 (((regv = regcon.immed.value[reg & 3]) >> 8) & 0xFF) == value) 2765 goto L2; 2766 2767 /* Avoid byte register loads on Pentium Pro and Pentium II 2768 * to avoid dependency stalls. 2769 */ 2770 if (config.flags4 & CFG4speed && 2771 config.target_cpu >= TARGET_PentiumPro && !(flags & 4)) 2772 goto L3; 2773 2774 // See if another register has the right value 2775 r = 0; 2776 for (mreg = (regcon.immed.mval & BYTEREGS); mreg; mreg >>= 1) 2777 { 2778 if (mreg & 1) 2779 { 2780 if ((regcon.immed.value[r] & 0xFF) == value) 2781 { 2782 genregs(cdb,0x8A,reg,r); // MOV regL,rL 2783 if (I64 && reg >= 4 || r >= 4) 2784 code_orrex(cdb.last(), REX); 2785 goto L2; 2786 } 2787 if (!(I64 && reg >= 4) && 2788 r < 4 && ((regcon.immed.value[r] >> 8) & 0xFF) == value) 2789 { 2790 genregs(cdb,0x8A,reg,r | 4); // MOV regL,rH 2791 goto L2; 2792 } 2793 } 2794 r++; 2795 } 2796 2797 if (value == 0 && !(flags & 8)) 2798 { 2799 if (!(flags & 4) && // if we can set the whole register 2800 !(flags & 16 && reg & 4)) // and reg is not an H register 2801 { 2802 genregs(cdb,0x31,reg,reg); // XOR reg,reg 2803 regimmed_set(reg,value); 2804 regv = 0; 2805 } 2806 else 2807 genregs(cdb,0x30,reg,reg); // XOR regL,regL 2808 flags &= ~mPSW; // flags already set by XOR 2809 } 2810 else 2811 { 2812 cdb.genc2(0xC6,modregrmx(3,0,reg),value); // MOV regL,value 2813 if (reg >= 4 && I64) 2814 { 2815 code_orrex(cdb.last(), REX); 2816 } 2817 } 2818 L2: 2819 if (flags & mPSW) 2820 genregs(cdb,0x84,reg,reg); // TEST regL,regL 2821 2822 if (regm) 2823 // Set just the 'L' part of the register value 2824 regimmed_set(reg,(regv & ~cast(targ_size_t)0xFF) | value); 2825 else if (flags & 16 && reg & 4 && regcon.immed.mval & mask(reg & 3)) 2826 // Set just the 'H' part of the register value 2827 regimmed_set((reg & 3),(regv & ~cast(targ_size_t)0xFF00) | (value << 8)); 2828 return; 2829 } 2830 L3: 2831 if (I16) 2832 value = cast(targ_short) value; // sign-extend MSW 2833 else if (I32) 2834 value = cast(targ_int) value; 2835 2836 if (!I16 && flags & 2) // load 16 bit value 2837 { 2838 value &= 0xFFFF; 2839 if (value == 0) 2840 goto L1; 2841 else 2842 { 2843 if (flags & mPSW) 2844 goto L1; 2845 cdb.genc2(0xC7,modregrmx(3,0,reg),value); // MOV reg,value 2846 cdb.last().Iflags |= CFopsize; // yes, even for I64 2847 if (regm) 2848 // High bits of register are not affected by 16 bit load 2849 regimmed_set(reg,(regv & ~cast(targ_size_t)0xFFFF) | value); 2850 } 2851 return; 2852 } 2853 L1: 2854 2855 // If we already have the right value in the right register 2856 if (regm && (regv & 0xFFFFFFFF) == (value & 0xFFFFFFFF) && !(flags & 64)) 2857 { 2858 if (flags & mPSW) 2859 gentstreg(cdb,reg); 2860 } 2861 else if (flags & 64 && regm && regv == value) 2862 { // Look at the full 64 bits 2863 if (flags & mPSW) 2864 { 2865 gentstreg(cdb,reg); 2866 code_orrex(cdb.last(), REX_W); 2867 } 2868 } 2869 else 2870 { 2871 if (flags & mPSW) 2872 { 2873 switch (value) 2874 { 2875 case 0: 2876 genregs(cdb,0x31,reg,reg); 2877 break; 2878 2879 case 1: 2880 if (I64) 2881 goto L4; 2882 genregs(cdb,0x31,reg,reg); 2883 goto inc; 2884 2885 case ~cast(targ_size_t)0: 2886 if (I64) 2887 goto L4; 2888 genregs(cdb,0x31,reg,reg); 2889 goto dec; 2890 2891 default: 2892 L4: 2893 if (flags & 64) 2894 { 2895 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 2896 gentstreg(cdb,reg); 2897 code_orrex(cdb.last(), REX_W); 2898 } 2899 else 2900 { 2901 value &= 0xFFFFFFFF; 2902 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 2903 gentstreg(cdb,reg); 2904 } 2905 break; 2906 } 2907 } 2908 else 2909 { 2910 // Look for single byte conversion 2911 if (regcon.immed.mval & mAX) 2912 { 2913 if (I32) 2914 { 2915 if (reg == AX && value == cast(targ_short) regv) 2916 { 2917 cdb.gen1(0x98); // CWDE 2918 goto done; 2919 } 2920 if (reg == DX && 2921 value == (regcon.immed.value[AX] & 0x80000000 ? 0xFFFFFFFF : 0) && 2922 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 2923 ) 2924 { 2925 cdb.gen1(0x99); // CDQ 2926 goto done; 2927 } 2928 } 2929 else if (I16) 2930 { 2931 if (reg == AX && 2932 cast(targ_short) value == cast(byte) regv) 2933 { 2934 cdb.gen1(0x98); // CBW 2935 goto done; 2936 } 2937 2938 if (reg == DX && 2939 cast(targ_short) value == (regcon.immed.value[AX] & 0x8000 ? cast(targ_short) 0xFFFF : cast(targ_short) 0) && 2940 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_Pentium) 2941 ) 2942 { 2943 cdb.gen1(0x99); // CWD 2944 goto done; 2945 } 2946 } 2947 } 2948 if (value == 0 && !(flags & 8) && config.target_cpu >= TARGET_80486) 2949 { 2950 genregs(cdb,0x31,reg,reg); // XOR reg,reg 2951 goto done; 2952 } 2953 2954 if (!I64 && regm && !(flags & 8)) 2955 { 2956 if (regv + 1 == value || 2957 // Catch case of (0xFFFF+1 == 0) for 16 bit compiles 2958 (I16 && cast(targ_short)(regv + 1) == cast(targ_short)value)) 2959 { 2960 inc: 2961 cdb.gen1(0x40 + reg); // INC reg 2962 goto done; 2963 } 2964 if (regv - 1 == value) 2965 { 2966 dec: 2967 cdb.gen1(0x48 + reg); // DEC reg 2968 goto done; 2969 } 2970 } 2971 2972 // See if another register has the right value 2973 r = 0; 2974 for (mreg = regcon.immed.mval; mreg; mreg >>= 1) 2975 { 2976 debug 2977 assert(!I16 || regcon.immed.value[r] == cast(targ_short)regcon.immed.value[r]); 2978 2979 if (mreg & 1 && regcon.immed.value[r] == value) 2980 { 2981 genmovreg(cdb,reg,r); 2982 goto done; 2983 } 2984 r++; 2985 } 2986 2987 if (value == 0 && !(flags & 8)) 2988 { 2989 genregs(cdb,0x31,reg,reg); // XOR reg,reg 2990 } 2991 else 2992 { // See if we can just load a byte 2993 if (regm & BYTEREGS && 2994 !(config.flags4 & CFG4speed && config.target_cpu >= TARGET_PentiumPro) 2995 ) 2996 { 2997 if ((regv & ~cast(targ_size_t)0xFF) == (value & ~cast(targ_size_t)0xFF)) 2998 { 2999 movregconst(cdb,reg,value,(flags & 8) |4|1); // load regL 3000 return; 3001 } 3002 if (regm & (mAX|mBX|mCX|mDX) && 3003 (regv & ~cast(targ_size_t)0xFF00) == (value & ~cast(targ_size_t)0xFF00) && 3004 !I64) 3005 { 3006 movregconst(cdb,4|reg,value >> 8,(flags & 8) |4|1|16); // load regH 3007 return; 3008 } 3009 } 3010 if (flags & 64) 3011 cdb.genc2(0xB8 + (reg&7),REX_W << 16 | (reg&8) << 13,value); // MOV reg,value64 3012 else 3013 { 3014 value &= 0xFFFFFFFF; 3015 cdb.genc2(0xB8 + (reg&7),(reg&8) << 13,value); // MOV reg,value 3016 } 3017 } 3018 } 3019 done: 3020 regimmed_set(reg,value); 3021 } 3022 } 3023 3024 /************************** 3025 * Generate a jump instruction. 3026 */ 3027 3028 void genjmp(ref CodeBuilder cdb,opcode_t op,uint fltarg,block *targ) 3029 { 3030 code cs; 3031 cs.Iop = op & 0xFF; 3032 cs.Iflags = 0; 3033 cs.Irex = 0; 3034 if (op != JMP && op != 0xE8) // if not already long branch 3035 cs.Iflags = CFjmp16; // assume long branch for op = 0x7x 3036 cs.IFL2 = cast(ubyte)fltarg; // FLblock (or FLcode) 3037 cs.IEV2.Vblock = targ; // target block (or code) 3038 if (fltarg == FLcode) 3039 (cast(code *)targ).Iflags |= CFtarg; 3040 3041 if (config.flags4 & CFG4fastfloat) // if fast floating point 3042 { 3043 cdb.gen(&cs); 3044 return; 3045 } 3046 3047 switch (op & 0xFF00) // look at second jump opcode 3048 { 3049 // The JP and JNP come from floating point comparisons 3050 case JP << 8: 3051 cdb.gen(&cs); 3052 cs.Iop = JP; 3053 cdb.gen(&cs); 3054 break; 3055 3056 case JNP << 8: 3057 { 3058 // Do a JP around the jump instruction 3059 code *cnop = gennop(null); 3060 genjmp(cdb,JP,FLcode,cast(block *) cnop); 3061 cdb.gen(&cs); 3062 cdb.append(cnop); 3063 break; 3064 } 3065 3066 case 1 << 8: // toggled no jump 3067 case 0 << 8: 3068 cdb.gen(&cs); 3069 break; 3070 3071 default: 3072 debug 3073 printf("jop = x%x\n",op); 3074 assert(0); 3075 } 3076 } 3077 3078 /********************************************* 3079 * Generate first part of prolog for interrupt function. 3080 */ 3081 void prolog_ifunc(ref CodeBuilder cdb, tym_t* tyf) 3082 { 3083 static immutable ubyte[4] ops2 = [ 0x60,0x1E,0x06,0 ]; 3084 static immutable ubyte[11] ops0 = [ 0x50,0x51,0x52,0x53, 3085 0x54,0x55,0x56,0x57, 3086 0x1E,0x06,0 ]; 3087 3088 immutable(ubyte)* p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 3089 do 3090 cdb.gen1(*p); 3091 while (*++p); 3092 3093 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3094 if (localsize) 3095 cod3_stackadj(cdb, cast(int)localsize); 3096 3097 *tyf |= mTYloadds; 3098 } 3099 3100 void prolog_ifunc2(ref CodeBuilder cdb, tym_t tyf, tym_t tym, bool pushds) 3101 { 3102 /* Determine if we need to reload DS */ 3103 if (tyf & mTYloadds) 3104 { 3105 if (!pushds) // if not already pushed 3106 cdb.gen1(0x1E); // PUSH DS 3107 spoff += _tysize[TYint]; 3108 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); // MOV AX,DGROUP 3109 code *c = cdb.last(); 3110 c.IEV2.Vseg = DATA; 3111 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3112 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3113 useregs(mAX); 3114 } 3115 3116 if (tym == TYifunc) 3117 cdb.gen1(0xFC); // CLD 3118 } 3119 3120 void prolog_16bit_windows_farfunc(ref CodeBuilder cdb, tym_t* tyf, bool* pushds) 3121 { 3122 int wflags = config.wflags; 3123 if (wflags & WFreduced && !(*tyf & mTYexport)) 3124 { // reduced prolog/epilog for non-exported functions 3125 wflags &= ~(WFdgroup | WFds | WFss); 3126 } 3127 3128 getregsNoSave(mAX); // should not have any value in AX 3129 3130 int segreg; 3131 switch (wflags & (WFdgroup | WFds | WFss)) 3132 { 3133 case WFdgroup: // MOV AX,DGROUP 3134 { 3135 if (wflags & WFreduced) 3136 *tyf &= ~mTYloadds; // remove redundancy 3137 cdb.genc(0xC7,modregrm(3,0,AX),0,0,FLdatseg,cast(targ_uns) 0); 3138 code *c = cdb.last(); 3139 c.IEV2.Vseg = DATA; 3140 c.Iflags ^= CFseg | CFoff; // turn off CFoff, on CFseg 3141 break; 3142 } 3143 3144 case WFss: 3145 segreg = 2; // SS 3146 goto Lmovax; 3147 3148 case WFds: 3149 segreg = 3; // DS 3150 Lmovax: 3151 cdb.gen2(0x8C,modregrm(3,segreg,AX)); // MOV AX,segreg 3152 if (wflags & WFds) 3153 cdb.gen1(0x90); // NOP 3154 break; 3155 3156 case 0: 3157 break; 3158 3159 default: 3160 debug 3161 printf("config.wflags = x%x\n",config.wflags); 3162 assert(0); 3163 } 3164 if (wflags & WFincbp) 3165 cdb.gen1(0x40 + BP); // INC BP 3166 cdb.gen1(0x50 + BP); // PUSH BP 3167 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3168 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 3169 { 3170 cdb.gen1(0x1E); // PUSH DS 3171 *pushds = true; 3172 BPoff = -REGSIZE; 3173 } 3174 if (wflags & (WFds | WFss | WFdgroup)) 3175 cdb.gen2(0x8E,modregrm(3,3,AX)); // MOV DS,AX 3176 } 3177 3178 /********************************************** 3179 * Set up frame register. 3180 * Input: 3181 * *xlocalsize amount of local variables 3182 * Output: 3183 * *enter set to true if ENTER instruction can be used, false otherwise 3184 * *xlocalsize amount to be subtracted from stack pointer 3185 * *cfa_offset the frame pointer's offset from the CFA 3186 * Returns: 3187 * generated code 3188 */ 3189 3190 void prolog_frame(ref CodeBuilder cdb, uint farfunc, uint* xlocalsize, bool* enter, int* cfa_offset) 3191 { 3192 //printf("prolog_frame\n"); 3193 *cfa_offset = 0; 3194 3195 if (0 && config.exe == EX_WIN64) 3196 { 3197 // PUSH RBP 3198 // LEA RBP,0[RSP] 3199 cdb. gen1(0x50 + BP); 3200 cdb.genc1(LEA,(REX_W<<16) | (modregrm(0,4,SP)<<8) | modregrm(2,BP,4),FLconst,0); 3201 *enter = false; 3202 return; 3203 } 3204 3205 if (config.wflags & WFincbp && farfunc) 3206 cdb.gen1(0x40 + BP); // INC BP 3207 if (config.target_cpu < TARGET_80286 || 3208 config.exe & (EX_LINUX | EX_LINUX64 | EX_OSX | EX_OSX64 | EX_FREEBSD | EX_FREEBSD64 | EX_DRAGONFLYBSD64 | EX_SOLARIS | EX_SOLARIS64 | EX_WIN64) || 3209 !localsize || 3210 config.flags & CFGstack || 3211 (*xlocalsize >= 0x1000 && config.exe & EX_flat) || 3212 localsize >= 0x10000 || 3213 (NTEXCEPTIONS == 2 && 3214 (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH))) || 3215 (config.target_cpu >= TARGET_80386 && 3216 config.flags4 & CFG4speed) 3217 ) 3218 { 3219 cdb.gen1(0x50 + BP); // PUSH BP 3220 genregs(cdb,0x8B,BP,SP); // MOV BP,SP 3221 if (I64) 3222 code_orrex(cdb.last(), REX_W); // MOV RBP,RSP 3223 if ((config.objfmt & (OBJ_ELF | OBJ_MACH)) && config.fulltypes) 3224 // Don't reorder instructions, as dwarf CFA relies on it 3225 code_orflag(cdb.last(), CFvolatile); 3226 static if (NTEXCEPTIONS == 2) 3227 { 3228 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.ehmethod == EHmethod.EH_WIN32 && !(funcsym_p.Sfunc.Fflags3 & Feh_none) || config.ehmethod == EHmethod.EH_SEH)) 3229 { 3230 nteh_prolog(cdb); 3231 int sz = nteh_contextsym_size(); 3232 assert(sz != 0); // should be 5*4, not 0 3233 *xlocalsize -= sz; // sz is already subtracted from ESP 3234 // by nteh_prolog() 3235 } 3236 } 3237 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3238 config.ehmethod == EHmethod.EH_DWARF) 3239 { 3240 int off = 2 * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3241 dwarf_CFA_set_loc(1); // address after PUSH EBP 3242 dwarf_CFA_set_reg_offset(SP, off); // CFA is now 8[ESP] 3243 dwarf_CFA_offset(BP, -off); // EBP is at 0[ESP] 3244 dwarf_CFA_set_loc(I64 ? 4 : 3); // address after MOV EBP,ESP 3245 /* Oddly, the CFA is not the same as the frame pointer, 3246 * which is why the offset of BP is set to 8 3247 */ 3248 dwarf_CFA_set_reg_offset(BP, off); // CFA is now 0[EBP] 3249 *cfa_offset = off; // remember the difference between the CFA and the frame pointer 3250 } 3251 *enter = false; /* do not use ENTER instruction */ 3252 } 3253 else 3254 *enter = true; 3255 } 3256 3257 /********************************************** 3258 * Enforce stack alignment. 3259 * Input: 3260 * cdb code builder. 3261 * Returns: 3262 * generated code 3263 */ 3264 void prolog_stackalign(ref CodeBuilder cdb) 3265 { 3266 if (!enforcealign) 3267 return; 3268 3269 const offset = (hasframe ? 2 : 1) * REGSIZE; // 1 for the return address + 1 for the PUSH EBP 3270 if (offset & (STACKALIGN - 1) || TARGET_STACKALIGN < STACKALIGN) 3271 cod3_stackalign(cdb, STACKALIGN); 3272 } 3273 3274 void prolog_frameadj(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool enter, bool* pushalloc) 3275 { 3276 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3277 static if (TARGET_LINUX) 3278 { 3279 bool check = false; // seems that Linux doesn't need to fault in stack pages 3280 } 3281 else 3282 { 3283 bool check = (config.flags & CFGstack && !(I32 && xlocalsize < 0x1000)) // if stack overflow check 3284 || (TARGET_WINDOS && xlocalsize >= 0x1000 && config.exe & EX_flat); 3285 } 3286 if (check) 3287 { 3288 if (I16) 3289 { 3290 // BUG: Won't work if parameter is passed in AX 3291 movregconst(cdb,AX,xlocalsize,false); // MOV AX,localsize 3292 makeitextern(getRtlsym(RTLSYM_CHKSTK)); 3293 // CALL _chkstk 3294 cdb.gencs((LARGECODE) ? 0x9A : CALL,0,FLfunc,getRtlsym(RTLSYM_CHKSTK)); 3295 useregs((ALLREGS | mBP | mES) & ~getRtlsym(RTLSYM_CHKSTK).Sregsaved); 3296 } 3297 else 3298 { 3299 /* Watch out for 64 bit code where EDX is passed as a register parameter 3300 */ 3301 reg_t reg = I64 ? R11 : DX; // scratch register 3302 3303 /* MOV EDX, xlocalsize/0x1000 3304 * L1: SUB ESP, 0x1000 3305 * TEST [ESP],ESP 3306 * DEC EDX 3307 * JNE L1 3308 * SUB ESP, xlocalsize % 0x1000 3309 */ 3310 movregconst(cdb, reg, xlocalsize / 0x1000, false); 3311 cod3_stackadj(cdb, 0x1000); 3312 code_orflag(cdb.last(), CFtarg2); 3313 cdb.gen2sib(0x85, modregrm(0,SP,4),modregrm(0,4,SP)); 3314 if (I64) 3315 { cdb.gen2(0xFF, modregrmx(3,1,R11)); // DEC R11D 3316 cdb.genc2(JNE,0,cast(targ_uns)-15); 3317 } 3318 else 3319 { cdb.gen1(0x48 + DX); // DEC EDX 3320 cdb.genc2(JNE,0,cast(targ_uns)-12); 3321 } 3322 regimmed_set(reg,0); // reg is now 0 3323 cod3_stackadj(cdb, xlocalsize & 0xFFF); 3324 useregs(mask(reg)); 3325 } 3326 } 3327 else 3328 { 3329 if (enter) 3330 { // ENTER xlocalsize,0 3331 cdb.genc(0xC8,0,FLconst,xlocalsize,FLconst,cast(targ_uns) 0); 3332 assert(!(config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D)); // didn't emit Dwarf data 3333 } 3334 else if (xlocalsize == REGSIZE && config.flags4 & CFG4optimized) 3335 { 3336 cdb. gen1(0x50 + pushallocreg); // PUSH AX 3337 // Do this to prevent an -x[EBP] to be moved in 3338 // front of the push. 3339 code_orflag(cdb.last(),CFvolatile); 3340 *pushalloc = true; 3341 } 3342 else 3343 cod3_stackadj(cdb, xlocalsize); 3344 } 3345 } 3346 3347 void prolog_frameadj2(ref CodeBuilder cdb, tym_t tyf, uint xlocalsize, bool* pushalloc) 3348 { 3349 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3350 if (xlocalsize == REGSIZE) 3351 { 3352 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3353 *pushalloc = true; 3354 } 3355 else if (xlocalsize == 2 * REGSIZE) 3356 { 3357 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3358 cdb.gen1(0x50 + pushallocreg); // PUSH AX 3359 *pushalloc = true; 3360 } 3361 else 3362 cod3_stackadj(cdb, xlocalsize); 3363 } 3364 3365 void prolog_setupalloca(ref CodeBuilder cdb) 3366 { 3367 //printf("prolog_setupalloca() offset x%x size x%x alignment x%x\n", 3368 //cast(int)Alloca.offset, cast(int)Alloca.size, cast(int)Alloca.alignment); 3369 // Set up magic parameter for alloca() 3370 // MOV -REGSIZE[BP],localsize - BPoff 3371 cdb.genc(0xC7,modregrm(2,0,BPRM), 3372 FLconst,Alloca.offset + BPoff, 3373 FLconst,localsize - BPoff); 3374 if (I64) 3375 code_orrex(cdb.last(), REX_W); 3376 } 3377 3378 /************************************** 3379 * Save registers that the function destroys, 3380 * but that the ABI says should be preserved across 3381 * function calls. 3382 * 3383 * Emit Dwarf info for these saves. 3384 * Params: 3385 * cdb = append generated instructions to this 3386 * topush = mask of registers to push 3387 * cfa_offset = offset of frame pointer from CFA 3388 */ 3389 3390 void prolog_saveregs(ref CodeBuilder cdb, regm_t topush, int cfa_offset) 3391 { 3392 if (pushoffuse) 3393 { 3394 // Save to preallocated section in the stack frame 3395 int xmmtopush = numbitsset(topush & XMMREGS); // XMM regs take 16 bytes 3396 int gptopush = numbitsset(topush) - xmmtopush; // general purpose registers to save 3397 targ_size_t xmmoffset = pushoff + BPoff; 3398 if (!hasframe || enforcealign) 3399 xmmoffset += EBPtoESP; 3400 targ_size_t gpoffset = xmmoffset + xmmtopush * 16; 3401 while (topush) 3402 { 3403 reg_t reg = findreg(topush); 3404 topush &= ~mask(reg); 3405 if (isXMMreg(reg)) 3406 { 3407 if (hasframe && !enforcealign) 3408 { 3409 // MOVUPD xmmoffset[EBP],xmm 3410 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3411 } 3412 else 3413 { 3414 // MOVUPD xmmoffset[ESP],xmm 3415 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3416 } 3417 xmmoffset += 16; 3418 } 3419 else 3420 { 3421 if (hasframe && !enforcealign) 3422 { 3423 // MOV gpoffset[EBP],reg 3424 cdb.genc1(0x89,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3425 } 3426 else 3427 { 3428 // MOV gpoffset[ESP],reg 3429 cdb.genc1(0x89,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3430 } 3431 if (I64) 3432 code_orrex(cdb.last(), REX_W); 3433 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3434 config.ehmethod == EHmethod.EH_DWARF) 3435 { // Emit debug_frame data giving location of saved register 3436 code *c = cdb.finish(); 3437 pinholeopt(c, null); 3438 dwarf_CFA_set_loc(calcblksize(c)); // address after save 3439 dwarf_CFA_offset(reg, cast(int)(gpoffset - cfa_offset)); 3440 cdb.reset(); 3441 cdb.append(c); 3442 } 3443 gpoffset += REGSIZE; 3444 } 3445 } 3446 } 3447 else 3448 { 3449 while (topush) /* while registers to push */ 3450 { 3451 reg_t reg = findreg(topush); 3452 topush &= ~mask(reg); 3453 if (isXMMreg(reg)) 3454 { 3455 // SUB RSP,16 3456 cod3_stackadj(cdb, 16); 3457 // MOVUPD 0[RSP],xmm 3458 cdb.genc1(STOUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3459 EBPtoESP += 16; 3460 spoff += 16; 3461 } 3462 else 3463 { 3464 genpush(cdb, reg); 3465 EBPtoESP += REGSIZE; 3466 spoff += REGSIZE; 3467 if (config.fulltypes == CVDWARF_C || config.fulltypes == CVDWARF_D || 3468 config.ehmethod == EHmethod.EH_DWARF) 3469 { // Emit debug_frame data giving location of saved register 3470 // relative to 0[EBP] 3471 code *c = cdb.finish(); 3472 pinholeopt(c, null); 3473 dwarf_CFA_set_loc(calcblksize(c)); // address after PUSH reg 3474 dwarf_CFA_offset(reg, -EBPtoESP - cfa_offset); 3475 cdb.reset(); 3476 cdb.append(c); 3477 } 3478 } 3479 } 3480 } 3481 } 3482 3483 /************************************** 3484 * Undo prolog_saveregs() 3485 */ 3486 3487 private void epilog_restoreregs(ref CodeBuilder cdb, regm_t topop) 3488 { 3489 debug 3490 if (topop & ~(XMMREGS | 0xFFFF)) 3491 printf("fregsaved = %s, mfuncreg = %s\n",regm_str(fregsaved),regm_str(mfuncreg)); 3492 3493 assert(!(topop & ~(XMMREGS | 0xFFFF))); 3494 if (pushoffuse) 3495 { 3496 // Save to preallocated section in the stack frame 3497 int xmmtopop = numbitsset(topop & XMMREGS); // XMM regs take 16 bytes 3498 int gptopop = numbitsset(topop) - xmmtopop; // general purpose registers to save 3499 targ_size_t xmmoffset = pushoff + BPoff; 3500 if (!hasframe || enforcealign) 3501 xmmoffset += EBPtoESP; 3502 targ_size_t gpoffset = xmmoffset + xmmtopop * 16; 3503 while (topop) 3504 { 3505 reg_t reg = findreg(topop); 3506 topop &= ~mask(reg); 3507 if (isXMMreg(reg)) 3508 { 3509 if (hasframe && !enforcealign) 3510 { 3511 // MOVUPD xmm,xmmoffset[EBP] 3512 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,BPRM),FLconst,xmmoffset); 3513 } 3514 else 3515 { 3516 // MOVUPD xmm,xmmoffset[ESP] 3517 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,xmmoffset); 3518 } 3519 xmmoffset += 16; 3520 } 3521 else 3522 { 3523 if (hasframe && !enforcealign) 3524 { 3525 // MOV reg,gpoffset[EBP] 3526 cdb.genc1(0x8B,modregxrm(2,reg,BPRM),FLconst,gpoffset); 3527 } 3528 else 3529 { 3530 // MOV reg,gpoffset[ESP] 3531 cdb.genc1(0x8B,modregxrm(2,reg,4) + 256*modregrm(0,4,SP),FLconst,gpoffset); 3532 } 3533 if (I64) 3534 code_orrex(cdb.last(), REX_W); 3535 gpoffset += REGSIZE; 3536 } 3537 } 3538 } 3539 else 3540 { 3541 reg_t reg = I64 ? XMM7 : DI; 3542 if (!(topop & XMMREGS)) 3543 reg = R15; 3544 regm_t regm = 1 << reg; 3545 3546 while (topop) 3547 { if (topop & regm) 3548 { 3549 if (isXMMreg(reg)) 3550 { 3551 // MOVUPD xmm,0[RSP] 3552 cdb.genc1(LODUPD,modregxrm(2,reg-XMM0,4) + 256*modregrm(0,4,SP),FLconst,0); 3553 // ADD RSP,16 3554 cod3_stackadj(cdb, -16); 3555 } 3556 else 3557 { 3558 cdb.gen1(0x58 + (reg & 7)); // POP reg 3559 if (reg & 8) 3560 code_orrex(cdb.last(), REX_B); 3561 } 3562 topop &= ~regm; 3563 } 3564 regm >>= 1; 3565 reg--; 3566 } 3567 } 3568 } 3569 3570 version (SCPP) 3571 { 3572 void prolog_trace(ref CodeBuilder cdb, bool farfunc, uint* regsaved) 3573 { 3574 Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_PRO_F : RTLSYM_TRACE_PRO_N); 3575 makeitextern(s); 3576 cdb.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALL _trace 3577 if (!I16) 3578 code_orflag(cdb.last(),CFoff | CFselfrel); 3579 /* Embedding the function name inline after the call works, but it 3580 * makes disassembling the code annoying. 3581 */ 3582 static if (ELFOBJ || MACHOBJ) 3583 { 3584 // Generate length prefixed name that is recognized by profiler 3585 size_t len = strlen(funcsym_p.Sident); 3586 char *buffer = cast(char *)malloc(len + 4); 3587 assert(buffer); 3588 if (len <= 254) 3589 { 3590 buffer[0] = len; 3591 memcpy(buffer + 1, funcsym_p.Sident, len); 3592 len++; 3593 } 3594 else 3595 { 3596 buffer[0] = 0xFF; 3597 buffer[1] = 0; 3598 buffer[2] = len & 0xFF; 3599 buffer[3] = len >> 8; 3600 memcpy(buffer + 4, funcsym_p.Sident, len); 3601 len += 4; 3602 } 3603 cdb.genasm(buffer, len); // append func name 3604 free(buffer); 3605 } 3606 else 3607 { 3608 char [IDMAX+IDOHD+1] name = void; 3609 size_t len = objmod.mangle(funcsym_p,name.ptr); 3610 assert(len < name.length); 3611 cdb.genasm(name.ptr,len); // append func name 3612 } 3613 *regsaved = s.Sregsaved; 3614 } 3615 } 3616 3617 /****************************** 3618 * Generate special varargs prolog for Posix 64 bit systems. 3619 * Params: 3620 * cdb = sink for generated code 3621 * sv = symbol for __va_argsave 3622 * namedargs = registers that named parameters (not ... arguments) were passed in. 3623 */ 3624 void prolog_genvarargs(ref CodeBuilder cdb, Symbol* sv, regm_t namedargs) 3625 { 3626 /* Generate code to move any arguments passed in registers into 3627 * the stack variable __va_argsave, 3628 * so we can reference it via pointers through va_arg(). 3629 * struct __va_argsave_t { 3630 * size_t[6] regs; 3631 * real[8] fpregs; 3632 * uint offset_regs; 3633 * uint offset_fpregs; 3634 * void* stack_args; 3635 * void* reg_args; 3636 * } 3637 * The MOVAPS instructions seg fault if data is not aligned on 3638 * 16 bytes, so this gives us a nice check to ensure no mistakes. 3639 MOV voff+0*8[RBP],EDI 3640 MOV voff+1*8[RBP],ESI 3641 MOV voff+2*8[RBP],RDX 3642 MOV voff+3*8[RBP],RCX 3643 MOV voff+4*8[RBP],R8 3644 MOV voff+5*8[RBP],R9 3645 MOVZX EAX,AL // AL = 0..8, # of XMM registers used 3646 SHL EAX,2 // 4 bytes for each MOVAPS 3647 LEA R11,offset L2[RIP] 3648 SUB R11,RAX 3649 LEA RAX,voff+6*8+0x7F[RBP] 3650 JMP R11d 3651 MOVAPS -0x0F[RAX],XMM7 // only save XMM registers if actually used 3652 MOVAPS -0x1F[RAX],XMM6 3653 MOVAPS -0x2F[RAX],XMM5 3654 MOVAPS -0x3F[RAX],XMM4 3655 MOVAPS -0x4F[RAX],XMM3 3656 MOVAPS -0x5F[RAX],XMM2 3657 MOVAPS -0x6F[RAX],XMM1 3658 MOVAPS -0x7F[RAX],XMM0 3659 L2: 3660 MOV 1[RAX],offset_regs // set __va_argsave.offset_regs 3661 MOV 5[RAX],offset_fpregs // set __va_argsave.offset_fpregs 3662 LEA R11, Para.size+Para.offset[RBP] 3663 MOV 9[RAX],R11 // set __va_argsave.stack_args 3664 SUB RAX,6*8+0x7F // point to start of __va_argsave 3665 MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args 3666 * RAX and R11 are destroyed. 3667 */ 3668 3669 /* Save registers into the voff area on the stack 3670 */ 3671 targ_size_t voff = Auto.size + BPoff + sv.Soffset; // EBP offset of start of sv 3672 const int vregnum = 6; 3673 const uint vsize = vregnum * 8 + 8 * 16; 3674 3675 static immutable ubyte[vregnum] regs = [ DI,SI,DX,CX,R8,R9 ]; 3676 3677 if (!hasframe || enforcealign) 3678 voff += EBPtoESP; 3679 3680 for (int i = 0; i < vregnum; i++) 3681 { 3682 uint r = regs[i]; 3683 if (!(mask(r) & namedargs)) // unnamed arguments would be the ... ones 3684 { 3685 uint ea = (REX_W << 16) | modregxrm(2,r,BPRM); 3686 if (!hasframe || enforcealign) 3687 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,r,4); 3688 cdb.genc1(0x89,ea,FLconst,voff + i*8); 3689 } 3690 } 3691 3692 genregs(cdb,0x0FB6,AX,AX); // MOVZX EAX,AL 3693 cdb.genc2(0xC1,modregrm(3,4,AX),2); // SHL EAX,2 3694 int raxoff = cast(int)(voff+6*8+0x7F); 3695 uint L2offset = (raxoff < -0x7F) ? 0x2D : 0x2A; 3696 if (!hasframe || enforcealign) 3697 L2offset += 1; // +1 for sib byte 3698 // LEA R11,offset L2[RIP] 3699 cdb.genc1(LEA,(REX_W << 16) | modregxrm(0,R11,5),FLconst,L2offset); 3700 genregs(cdb,0x29,AX,R11); // SUB R11,RAX 3701 code_orrex(cdb.last(), REX_W); 3702 // LEA RAX,voff+vsize-6*8-16+0x7F[RBP] 3703 uint ea = (REX_W << 16) | modregrm(2,AX,BPRM); 3704 if (!hasframe || enforcealign) 3705 // add sib byte for [RSP] addressing 3706 ea = (REX_W << 16) | (modregrm(0,4,SP) << 8) | modregxrm(2,AX,4); 3707 cdb.genc1(LEA,ea,FLconst,raxoff); 3708 cdb.gen2(0xFF,modregrmx(3,4,R11)); // JMP R11d 3709 for (int i = 0; i < 8; i++) 3710 { 3711 // MOVAPS -15-16*i[RAX],XMM7-i 3712 cdb.genc1(0x0F29,modregrm(0,XMM7-i,0),FLconst,-15-16*i); 3713 } 3714 3715 /* Compute offset_regs and offset_fpregs 3716 */ 3717 uint offset_regs = 0; 3718 uint offset_fpregs = vregnum * 8; 3719 for (int i = AX; i <= XMM7; i++) 3720 { 3721 regm_t m = mask(i); 3722 if (m & namedargs) 3723 { 3724 if (m & (mDI|mSI|mDX|mCX|mR8|mR9)) 3725 offset_regs += 8; 3726 else if (m & XMMREGS) 3727 offset_fpregs += 16; 3728 namedargs &= ~m; 3729 if (!namedargs) 3730 break; 3731 } 3732 } 3733 // MOV 1[RAX],offset_regs 3734 cdb.genc(0xC7,modregrm(2,0,AX),FLconst,1,FLconst,offset_regs); 3735 3736 // MOV 5[RAX],offset_fpregs 3737 cdb.genc(0xC7,modregrm(2,0,AX),FLconst,5,FLconst,offset_fpregs); 3738 3739 // LEA R11, Para.size+Para.offset[RBP] 3740 ea = modregxrm(2,R11,BPRM); 3741 if (!hasframe) 3742 ea = (modregrm(0,4,SP) << 8) | modregrm(2,DX,4); 3743 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 3744 cdb.genc1(LEA,(REX_W << 16) | ea,FLconst,Para.size + Para.offset); 3745 3746 // MOV 9[RAX],R11 3747 cdb.genc1(0x89,(REX_W << 16) | modregxrm(2,R11,AX),FLconst,9); 3748 3749 // SUB RAX,6*8+0x7F // point to start of __va_argsave 3750 cdb.genc2(0x2D,0,6*8+0x7F); 3751 code_orrex(cdb.last(), REX_W); 3752 3753 // MOV 6*8+8*16+4+4+8[RAX],RAX // set __va_argsave.reg_args 3754 cdb.genc1(0x89,(REX_W << 16) | modregrm(2,AX,AX),FLconst,6*8+8*16+4+4+8); 3755 3756 pinholeopt(cdb.peek(), null); 3757 useregs(mAX|mR11); 3758 } 3759 3760 void prolog_gen_win64_varargs(ref CodeBuilder cdb) 3761 { 3762 /* The Microsoft scheme. 3763 * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80) 3764 * Copy registers onto stack. 3765 mov 8[RSP],RCX 3766 mov 010h[RSP],RDX 3767 mov 018h[RSP],R8 3768 mov 020h[RSP],R9 3769 */ 3770 } 3771 3772 /************************************ 3773 * Params: 3774 * cdb = generated code sink 3775 * tf = what's the type of the function 3776 * pushalloc = use PUSH to allocate on the stack rather than subtracting from SP 3777 * namedargs = set to the registers that named parameters were passed in 3778 */ 3779 void prolog_loadparams(ref CodeBuilder cdb, tym_t tyf, bool pushalloc, out regm_t namedargs) 3780 { 3781 //printf("prolog_loadparams()\n"); 3782 debug 3783 for (SYMIDX si = 0; si < globsym.top; si++) 3784 { 3785 Symbol *s = globsym.tab[si]; 3786 if (debugr && (s.Sclass == SCfastpar || s.Sclass == SCshadowreg)) 3787 { 3788 printf("symbol '%s' is fastpar in register [%s,%s]\n", s.Sident.ptr, 3789 regm_str(mask(s.Spreg)), 3790 (s.Spreg2 == NOREG ? "NOREG" : regm_str(mask(s.Spreg2)))); 3791 if (s.Sfl == FLreg) 3792 printf("\tassigned to register %s\n", regm_str(mask(s.Sreglsw))); 3793 } 3794 } 3795 3796 uint pushallocreg = (tyf == TYmfunc) ? CX : AX; 3797 3798 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were not assigned 3799 * registers into their stack locations. 3800 */ 3801 regm_t shadowregm = 0; 3802 for (SYMIDX si = 0; si < globsym.top; si++) 3803 { 3804 Symbol *s = globsym.tab[si]; 3805 uint sz = cast(uint)type_size(s.Stype); 3806 3807 if ((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl != FLreg) 3808 { // Argument is passed in a register 3809 3810 type *t = s.Stype; 3811 type *t2 = null; 3812 3813 tym_t tyb = tybasic(t.Tty); 3814 3815 // This logic is same as FuncParamRegs_alloc function at src/dmd/backend/cod1.d 3816 // 3817 // Treat array of 1 the same as its element type 3818 // (Don't put volatile parameters in registers) 3819 if (tyb == TYarray && t.Tdim == 1 && !(t.Tty & mTYvolatile)) 3820 { 3821 t = t.Tnext; 3822 tyb = tybasic(t.Tty); 3823 } 3824 3825 // If struct just wraps another type 3826 if (tyb == TYstruct) 3827 { 3828 // On windows 64 bits, structs occupy a general purpose register, 3829 // regardless of the struct size or the number & types of its fields. 3830 if (config.exe != EX_WIN64) 3831 { 3832 type *targ1 = t.Ttag.Sstruct.Sarg1type; 3833 t2 = t.Ttag.Sstruct.Sarg2type; 3834 if (targ1) 3835 t = targ1; 3836 } 3837 } 3838 3839 if (Symbol_Sisdead(s, anyiasm)) 3840 { 3841 // Ignore it, as it is never referenced 3842 } 3843 else 3844 { 3845 targ_size_t offset = Fast.size + BPoff; 3846 if (s.Sclass == SCshadowreg) 3847 offset = Para.size; 3848 offset += s.Soffset; 3849 if (!hasframe || (enforcealign && s.Sclass != SCshadowreg)) 3850 offset += EBPtoESP; 3851 3852 reg_t preg = s.Spreg; 3853 for (int i = 0; i < 2; ++i) // twice, once for each possible parameter register 3854 { 3855 shadowregm |= mask(preg); 3856 opcode_t op = 0x89; // MOV x[EBP],preg 3857 if (isXMMreg(preg)) 3858 op = xmmstore(t.Tty); 3859 if (!(pushalloc && preg == pushallocreg) || s.Sclass == SCshadowreg) 3860 { 3861 if (hasframe && (!enforcealign || s.Sclass == SCshadowreg)) 3862 { 3863 // MOV x[EBP],preg 3864 cdb.genc1(op,modregxrm(2,preg,BPRM),FLconst,offset); 3865 if (isXMMreg(preg)) 3866 { 3867 checkSetVex(cdb.last(), t.Tty); 3868 } 3869 else 3870 { 3871 //printf("%s Fast.size = %d, BPoff = %d, Soffset = %d, sz = %d\n", 3872 // s.Sident, (int)Fast.size, (int)BPoff, (int)s.Soffset, (int)sz); 3873 if (I64 && sz > 4) 3874 code_orrex(cdb.last(), REX_W); 3875 } 3876 } 3877 else 3878 { 3879 // MOV offset[ESP],preg 3880 // BUG: byte size? 3881 cdb.genc1(op, 3882 (modregrm(0,4,SP) << 8) | 3883 modregxrm(2,preg,4),FLconst,offset); 3884 if (isXMMreg(preg)) 3885 { 3886 checkSetVex(cdb.last(), t.Tty); 3887 } 3888 else 3889 { 3890 if (I64 && sz > 4) 3891 cdb.last().Irex |= REX_W; 3892 } 3893 } 3894 } 3895 preg = s.Spreg2; 3896 if (preg == NOREG) 3897 break; 3898 if (t2) 3899 t = t2; 3900 offset += REGSIZE; 3901 } 3902 } 3903 } 3904 } 3905 3906 if (config.exe == EX_WIN64 && variadic(funcsym_p.Stype)) 3907 { 3908 /* The Microsoft scheme. 3909 * http://msdn.microsoft.com/en-US/library/dd2wa36c(v=vs.80) 3910 * Copy registers onto stack. 3911 mov 8[RSP],RCX or XMM0 3912 mov 010h[RSP],RDX or XMM1 3913 mov 018h[RSP],R8 or XMM2 3914 mov 020h[RSP],R9 or XMM3 3915 */ 3916 static immutable reg_t[4] vregs = [ CX,DX,R8,R9 ]; 3917 for (int i = 0; i < vregs.length; ++i) 3918 { 3919 uint preg = vregs[i]; 3920 uint offset = cast(uint)(Para.size + i * REGSIZE); 3921 if (!(shadowregm & (mask(preg) | mask(XMM0 + i)))) 3922 { 3923 if (hasframe) 3924 { 3925 // MOV x[EBP],preg 3926 cdb.genc1(0x89, 3927 modregxrm(2,preg,BPRM),FLconst, offset); 3928 code_orrex(cdb.last(), REX_W); 3929 } 3930 else 3931 { 3932 // MOV offset[ESP],preg 3933 cdb.genc1(0x89, 3934 (modregrm(0,4,SP) << 8) | 3935 modregxrm(2,preg,4),FLconst,offset + EBPtoESP); 3936 } 3937 cdb.last().Irex |= REX_W; 3938 } 3939 } 3940 } 3941 3942 /* Copy SCfastpar and SCshadowreg (parameters passed in registers) that were assigned registers 3943 * into their assigned registers. 3944 * Note that we have a big problem if Pa is passed in R1 and assigned to R2, 3945 * and Pb is passed in R2 but assigned to R1. Detect it and assert. 3946 */ 3947 regm_t assignregs = 0; 3948 for (SYMIDX si = 0; si < globsym.top; si++) 3949 { 3950 Symbol *s = globsym.tab[si]; 3951 uint sz = cast(uint)type_size(s.Stype); 3952 3953 if (s.Sclass == SCfastpar || s.Sclass == SCshadowreg) 3954 namedargs |= s.Spregm(); 3955 3956 if ((s.Sclass == SCfastpar || s.Sclass == SCshadowreg) && s.Sfl == FLreg) 3957 { // Argument is passed in a register 3958 3959 type *t = s.Stype; 3960 type *t2 = null; 3961 if (tybasic(t.Tty) == TYstruct && config.exe != EX_WIN64) 3962 { type *targ1 = t.Ttag.Sstruct.Sarg1type; 3963 t2 = t.Ttag.Sstruct.Sarg2type; 3964 if (targ1) 3965 t = targ1; 3966 } 3967 3968 reg_t preg = s.Spreg; 3969 reg_t r = s.Sreglsw; 3970 for (int i = 0; i < 2; ++i) 3971 { 3972 if (preg == NOREG) 3973 break; 3974 assert(!(mask(preg) & assignregs)); // not already stepped on 3975 assignregs |= mask(r); 3976 3977 // MOV reg,preg 3978 if (r == preg) 3979 { 3980 } 3981 else if (mask(preg) & XMMREGS) 3982 { 3983 const op = xmmload(t.Tty); // MOVSS/D xreg,preg 3984 uint xreg = r - XMM0; 3985 cdb.gen2(op,modregxrmx(3,xreg,preg - XMM0)); 3986 } 3987 else 3988 { 3989 //printf("test1 mov %s, %s\n", regstring[r], regstring[preg]); 3990 genmovreg(cdb,r,preg); 3991 if (I64 && sz == 8) 3992 code_orrex(cdb.last(), REX_W); 3993 } 3994 preg = s.Spreg2; 3995 r = s.Sregmsw; 3996 if (t2) 3997 t = t2; 3998 } 3999 } 4000 } 4001 4002 /* For parameters that were passed on the stack, but are enregistered, 4003 * initialize the registers with the parameter stack values. 4004 * Do not use assignaddr(), as it will replace the stack reference with 4005 * the register. 4006 */ 4007 for (SYMIDX si = 0; si < globsym.top; si++) 4008 { 4009 Symbol *s = globsym.tab[si]; 4010 uint sz = cast(uint)type_size(s.Stype); 4011 4012 if ((s.Sclass == SCregpar || s.Sclass == SCparameter) && 4013 s.Sfl == FLreg && 4014 (refparam 4015 // This variable has been reference by a nested function 4016 || MARS && s.Stype.Tty & mTYvolatile 4017 )) 4018 { 4019 // MOV reg,param[BP] 4020 //assert(refparam); 4021 if (mask(s.Sreglsw) & XMMREGS) 4022 { 4023 const op = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4024 uint xreg = s.Sreglsw - XMM0; 4025 cdb.genc1(op,modregxrm(2,xreg,BPRM),FLconst,Para.size + s.Soffset); 4026 if (!hasframe) 4027 { // Convert to ESP relative address rather than EBP 4028 code *c = cdb.last(); 4029 c.Irm = cast(ubyte)modregxrm(2,xreg,4); 4030 c.Isib = modregrm(0,4,SP); 4031 c.IEV1.Vpointer += EBPtoESP; 4032 } 4033 } 4034 else 4035 { 4036 cdb.genc1(sz == 1 ? 0x8A : 0x8B, 4037 modregxrm(2,s.Sreglsw,BPRM),FLconst,Para.size + s.Soffset); 4038 code *c = cdb.last(); 4039 if (!I16 && sz == SHORTSIZE) 4040 c.Iflags |= CFopsize; // operand size 4041 if (I64 && sz >= REGSIZE) 4042 c.Irex |= REX_W; 4043 if (I64 && sz == 1 && s.Sreglsw >= 4) 4044 c.Irex |= REX; 4045 if (!hasframe) 4046 { // Convert to ESP relative address rather than EBP 4047 assert(!I16); 4048 c.Irm = cast(ubyte)modregxrm(2,s.Sreglsw,4); 4049 c.Isib = modregrm(0,4,SP); 4050 c.IEV1.Vpointer += EBPtoESP; 4051 } 4052 if (sz > REGSIZE) 4053 { 4054 cdb.genc1(0x8B, 4055 modregxrm(2,s.Sregmsw,BPRM),FLconst,Para.size + s.Soffset + REGSIZE); 4056 code *cx = cdb.last(); 4057 if (I64) 4058 cx.Irex |= REX_W; 4059 if (!hasframe) 4060 { // Convert to ESP relative address rather than EBP 4061 assert(!I16); 4062 cx.Irm = cast(ubyte)modregxrm(2,s.Sregmsw,4); 4063 cx.Isib = modregrm(0,4,SP); 4064 cx.IEV1.Vpointer += EBPtoESP; 4065 } 4066 } 4067 } 4068 } 4069 } 4070 } 4071 4072 /******************************* 4073 * Generate and return function epilog. 4074 * Output: 4075 * retsize Size of function epilog 4076 */ 4077 4078 void epilog(block *b) 4079 { 4080 code *cpopds; 4081 reg_t reg; 4082 reg_t regx; // register that's not a return reg 4083 regm_t topop,regm; 4084 targ_size_t xlocalsize = localsize; 4085 4086 CodeBuilder cdbx; cdbx.ctor(); 4087 tym_t tyf = funcsym_p.ty(); 4088 tym_t tym = tybasic(tyf); 4089 bool farfunc = tyfarfunc(tym) != 0; 4090 if (!(b.Bflags & BFLepilog)) // if no epilog code 4091 goto Lret; // just generate RET 4092 regx = (b.BC == BCret) ? AX : CX; 4093 4094 retsize = 0; 4095 4096 if (tyf & mTYnaked) // if no prolog/epilog 4097 return; 4098 4099 if (tym == TYifunc) 4100 { 4101 static immutable ubyte[5] ops2 = [ 0x07,0x1F,0x61,0xCF,0 ]; 4102 static immutable ubyte[12] ops0 = [ 0x07,0x1F,0x5F,0x5E, 4103 0x5D,0x5B,0x5B,0x5A, 4104 0x59,0x58,0xCF,0 ]; 4105 4106 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4107 auto p = (config.target_cpu >= TARGET_80286) ? ops2.ptr : ops0.ptr; 4108 do 4109 cdbx.gen1(*p); 4110 while (*++p); 4111 goto Lopt; 4112 } 4113 4114 if (config.flags & CFGtrace && 4115 (!(config.flags4 & CFG4allcomdat) || 4116 funcsym_p.Sclass == SCcomdat || 4117 funcsym_p.Sclass == SCglobal || 4118 (config.flags2 & CFG2comdat && SymInline(funcsym_p)) 4119 ) 4120 ) 4121 { 4122 Symbol *s = getRtlsym(farfunc ? RTLSYM_TRACE_EPI_F : RTLSYM_TRACE_EPI_N); 4123 makeitextern(s); 4124 cdbx.gencs(I16 ? 0x9A : CALL,0,FLfunc,s); // CALLF _trace 4125 if (!I16) 4126 code_orflag(cdbx.last(),CFoff | CFselfrel); 4127 useregs((ALLREGS | mBP | mES) & ~s.Sregsaved); 4128 } 4129 4130 if (usednteh & (NTEH_try | NTEH_except | NTEHcpp | EHcleanup | EHtry | NTEHpassthru) && (config.exe == EX_WIN32 || MARS)) 4131 { 4132 nteh_epilog(cdbx); 4133 } 4134 4135 cpopds = null; 4136 if (tyf & mTYloadds) 4137 { 4138 cdbx.gen1(0x1F); // POP DS 4139 cpopds = cdbx.last(); 4140 } 4141 4142 /* Pop all the general purpose registers saved on the stack 4143 * by the prolog code. Remember to do them in the reverse 4144 * order they were pushed. 4145 */ 4146 topop = fregsaved & ~mfuncreg; 4147 epilog_restoreregs(cdbx, topop); 4148 4149 version (MARS) 4150 { 4151 if (usednteh & NTEHjmonitor) 4152 { 4153 regm_t retregs = 0; 4154 if (b.BC == BCretexp) 4155 retregs = regmask(b.Belem.Ety, tym); 4156 nteh_monitor_epilog(cdbx,retregs); 4157 xlocalsize += 8; 4158 } 4159 } 4160 4161 if (config.wflags & WFwindows && farfunc) 4162 { 4163 int wflags = config.wflags; 4164 if (wflags & WFreduced && !(tyf & mTYexport)) 4165 { // reduced prolog/epilog for non-exported functions 4166 wflags &= ~(WFdgroup | WFds | WFss); 4167 if (!(wflags & WFsaveds)) 4168 goto L4; 4169 } 4170 4171 if (localsize) 4172 { 4173 cdbx.genc1(LEA,modregrm(1,SP,6),FLconst,cast(targ_uns)-2); /* LEA SP,-2[BP] */ 4174 } 4175 if (wflags & (WFsaveds | WFds | WFss | WFdgroup)) 4176 { 4177 if (cpopds) 4178 cpopds.Iop = NOP; // don't need previous one 4179 cdbx.gen1(0x1F); // POP DS 4180 } 4181 cdbx.gen1(0x58 + BP); // POP BP 4182 if (config.wflags & WFincbp) 4183 cdbx.gen1(0x48 + BP); // DEC BP 4184 assert(hasframe); 4185 } 4186 else 4187 { 4188 if (needframe || (xlocalsize && hasframe)) 4189 { 4190 L4: 4191 assert(hasframe); 4192 if (xlocalsize || enforcealign) 4193 { 4194 if (config.flags2 & CFG2stomp) 4195 { /* MOV ECX,0xBEAF 4196 * L1: 4197 * MOV [ESP],ECX 4198 * ADD ESP,4 4199 * CMP EBP,ESP 4200 * JNE L1 4201 * POP EBP 4202 */ 4203 /* Value should be: 4204 * 1. != 0 (code checks for null pointers) 4205 * 2. be odd (to mess up alignment) 4206 * 3. fall in first 64K (likely marked as inaccessible) 4207 * 4. be a value that stands out in the debugger 4208 */ 4209 assert(I32 || I64); 4210 targ_size_t value = 0x0000BEAF; 4211 reg_t regcx = CX; 4212 mfuncreg &= ~mask(regcx); 4213 uint grex = I64 ? REX_W << 16 : 0; 4214 cdbx.genc2(0xC7,grex | modregrmx(3,0,regcx),value); // MOV regcx,value 4215 cdbx.gen2sib(0x89,grex | modregrm(0,regcx,4),modregrm(0,4,SP)); // MOV [ESP],regcx 4216 code *c1 = cdbx.last(); 4217 cdbx.genc2(0x81,grex | modregrm(3,0,SP),REGSIZE); // ADD ESP,REGSIZE 4218 genregs(cdbx,0x39,SP,BP); // CMP EBP,ESP 4219 if (I64) 4220 code_orrex(cdbx.last(),REX_W); 4221 genjmp(cdbx,JNE,FLcode,cast(block *)c1); // JNE L1 4222 // explicitly mark as short jump, needed for correct retsize calculation (Bugzilla 15779) 4223 cdbx.last().Iflags &= ~CFjmp16; 4224 cdbx.gen1(0x58 + BP); // POP BP 4225 } 4226 else if (config.exe == EX_WIN64) 4227 { // See http://msdn.microsoft.com/en-us/library/tawsa7cb(v=vs.80).aspx 4228 // LEA RSP,0[RBP] 4229 cdbx.genc1(LEA,(REX_W<<16)|modregrm(2,SP,BPRM),FLconst,0); 4230 cdbx.gen1(0x58 + BP); // POP RBP 4231 } 4232 else if (config.target_cpu >= TARGET_80286 && 4233 !(config.target_cpu >= TARGET_80386 && config.flags4 & CFG4speed) 4234 ) 4235 cdbx.gen1(0xC9); // LEAVE 4236 else if (0 && xlocalsize == REGSIZE && Alloca.size == 0 && I32) 4237 { // This doesn't work - I should figure out why 4238 mfuncreg &= ~mask(regx); 4239 cdbx.gen1(0x58 + regx); // POP regx 4240 cdbx.gen1(0x58 + BP); // POP BP 4241 } 4242 else 4243 { 4244 genregs(cdbx,0x8B,SP,BP); // MOV SP,BP 4245 if (I64) 4246 code_orrex(cdbx.last(), REX_W); // MOV RSP,RBP 4247 cdbx.gen1(0x58 + BP); // POP BP 4248 } 4249 } 4250 else 4251 cdbx.gen1(0x58 + BP); // POP BP 4252 if (config.wflags & WFincbp && farfunc) 4253 cdbx.gen1(0x48 + BP); // DEC BP 4254 } 4255 else if (xlocalsize == REGSIZE && (!I16 || b.BC == BCret)) 4256 { 4257 mfuncreg &= ~mask(regx); 4258 cdbx.gen1(0x58 + regx); // POP regx 4259 } 4260 else if (xlocalsize) 4261 cod3_stackadj(cdbx, cast(int)-xlocalsize); 4262 } 4263 if (b.BC == BCret || b.BC == BCretexp) 4264 { 4265 Lret: 4266 opcode_t op = tyfarfunc(tym) ? 0xCA : 0xC2; 4267 if (tym == TYhfunc) 4268 { 4269 cdbx.genc2(0xC2,0,4); // RET 4 4270 } 4271 else if (!typfunc(tym) || // if caller cleans the stack 4272 config.exe == EX_WIN64 || 4273 Para.offset == 0) // or nothing pushed on the stack anyway 4274 { 4275 op++; // to a regular RET 4276 cdbx.gen1(op); 4277 } 4278 else 4279 { // Stack is always aligned on register size boundary 4280 Para.offset = (Para.offset + (REGSIZE - 1)) & ~(REGSIZE - 1); 4281 if (Para.offset >= 0x10000) 4282 { 4283 /* 4284 POP REG 4285 ADD ESP, Para.offset 4286 JMP REG 4287 */ 4288 cdbx.gen1(0x58+regx); 4289 cdbx.genc2(0x81, modregrm(3,0,SP), Para.offset); 4290 if (I64) 4291 code_orrex(cdbx.last(), REX_W); 4292 cdbx.genc2(0xFF, modregrm(3,4,regx), 0); 4293 if (I64) 4294 code_orrex(cdbx.last(), REX_W); 4295 } 4296 else 4297 cdbx.genc2(op,0,Para.offset); // RET Para.offset 4298 } 4299 } 4300 4301 Lopt: 4302 // If last instruction in ce is ADD SP,imm, and first instruction 4303 // in c sets SP, we can dump the ADD. 4304 CodeBuilder cdb; cdb.ctor(); 4305 cdb.append(b.Bcode); 4306 code *cr = cdb.last(); 4307 code *c = cdbx.peek(); 4308 if (cr && c && !I64) 4309 { 4310 if (cr.Iop == 0x81 && cr.Irm == modregrm(3,0,SP)) // if ADD SP,imm 4311 { 4312 if ( 4313 c.Iop == 0xC9 || // LEAVE 4314 (c.Iop == 0x8B && c.Irm == modregrm(3,SP,BP)) || // MOV SP,BP 4315 (c.Iop == LEA && c.Irm == modregrm(1,SP,6)) // LEA SP,-imm[BP] 4316 ) 4317 cr.Iop = NOP; 4318 else if (c.Iop == 0x58 + BP) // if POP BP 4319 { 4320 cr.Iop = 0x8B; 4321 cr.Irm = modregrm(3,SP,BP); // MOV SP,BP 4322 } 4323 } 4324 else 4325 { 4326 static if (0) 4327 { 4328 // These optimizations don't work if the called function 4329 // cleans off the stack. 4330 if (c.Iop == 0xC3 && cr.Iop == CALL) // CALL near 4331 { 4332 cr.Iop = 0xE9; // JMP near 4333 c.Iop = NOP; 4334 } 4335 else if (c.Iop == 0xCB && cr.Iop == 0x9A) // CALL far 4336 { 4337 cr.Iop = 0xEA; // JMP far 4338 c.Iop = NOP; 4339 } 4340 } 4341 } 4342 } 4343 4344 pinholeopt(c, null); 4345 retsize += calcblksize(c); // compute size of function epilog 4346 cdb.append(cdbx); 4347 b.Bcode = cdb.finish(); 4348 } 4349 4350 /******************************* 4351 * Return offset of SP from BP. 4352 */ 4353 4354 targ_size_t cod3_spoff() 4355 { 4356 //printf("spoff = x%x, localsize = x%x\n", (int)spoff, (int)localsize); 4357 return spoff + localsize; 4358 } 4359 4360 void gen_spill_reg(ref CodeBuilder cdb, Symbol* s, bool toreg) 4361 { 4362 code cs; 4363 const regm_t keepmsk = toreg ? RMload : RMstore; 4364 4365 elem* e = el_var(s); // so we can trick getlvalue() into working for us 4366 4367 if (mask(s.Sreglsw) & XMMREGS) 4368 { // Convert to save/restore of XMM register 4369 if (toreg) 4370 cs.Iop = xmmload(s.Stype.Tty); // MOVSS/D xreg,mem 4371 else 4372 cs.Iop = xmmstore(s.Stype.Tty); // MOVSS/D mem,xreg 4373 getlvalue(cdb,&cs,e,keepmsk); 4374 cs.orReg(s.Sreglsw - XMM0); 4375 cdb.gen(&cs); 4376 } 4377 else 4378 { 4379 const int sz = cast(int)type_size(s.Stype); 4380 cs.Iop = toreg ? 0x8B : 0x89; // MOV reg,mem[ESP] : MOV mem[ESP],reg 4381 cs.Iop ^= (sz == 1); 4382 getlvalue(cdb,&cs,e,keepmsk); 4383 cs.orReg(s.Sreglsw); 4384 if (I64 && sz == 1 && s.Sreglsw >= 4) 4385 cs.Irex |= REX; 4386 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4387 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4388 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4389 { } // skip MOV reg,reg 4390 else 4391 cdb.gen(&cs); 4392 if (sz > REGSIZE) 4393 { 4394 cs.setReg(s.Sregmsw); 4395 getlvalue_msw(&cs); 4396 if ((cs.Irm & 0xC0) == 0xC0 && // reg,reg 4397 (((cs.Irm >> 3) ^ cs.Irm) & 7) == 0 && // registers match 4398 (((cs.Irex >> 2) ^ cs.Irex) & 1) == 0) // REX_R and REX_B match 4399 { } // skip MOV reg,reg 4400 else 4401 cdb.gen(&cs); 4402 } 4403 } 4404 4405 el_free(e); 4406 } 4407 4408 /**************************** 4409 * Generate code for, and output a thunk. 4410 * Params: 4411 * sthunk = Symbol of thunk 4412 * sfunc = Symbol of thunk's target function 4413 * thisty = Type of this pointer 4414 * p = ESP parameter offset to this pointer 4415 * d = offset to add to 'this' pointer 4416 * d2 = offset from 'this' to vptr 4417 * i = offset into vtbl[] 4418 */ 4419 4420 void cod3_thunk(Symbol *sthunk,Symbol *sfunc,uint p,tym_t thisty, 4421 uint d,int i,uint d2) 4422 { 4423 targ_size_t thunkoffset; 4424 4425 int seg = sthunk.Sseg; 4426 cod3_align(seg); 4427 4428 // Skip over return address 4429 tym_t thunkty = tybasic(sthunk.ty()); 4430 if (tyfarfunc(thunkty)) 4431 p += I32 ? 8 : tysize(TYfptr); // far function 4432 else 4433 p += tysize(TYnptr); 4434 4435 CodeBuilder cdb; cdb.ctor(); 4436 if (!I16) 4437 { 4438 /* 4439 Generate: 4440 ADD p[ESP],d 4441 For direct call: 4442 JMP sfunc 4443 For virtual call: 4444 MOV EAX, p[ESP] EAX = this 4445 MOV EAX, d2[EAX] EAX = this.vptr 4446 JMP i[EAX] jump to virtual function 4447 */ 4448 reg_t reg = 0; 4449 if (cast(int)d < 0) 4450 { 4451 d = -d; 4452 reg = 5; // switch from ADD to SUB 4453 } 4454 if (thunkty == TYmfunc) 4455 { // ADD ECX,d 4456 if (d) 4457 cdb.genc2(0x81,modregrm(3,reg,CX),d); 4458 } 4459 else if (thunkty == TYjfunc || (I64 && thunkty == TYnfunc)) 4460 { // ADD EAX,d 4461 int rm = AX; 4462 if (config.exe == EX_WIN64) 4463 rm = CX; 4464 else if (I64) 4465 rm = DI; 4466 if (d) 4467 cdb.genc2(0x81,modregrm(3,reg,rm),d); 4468 } 4469 else 4470 { 4471 cdb.genc(0x81,modregrm(2,reg,4), 4472 FLconst,p, // to this 4473 FLconst,d); // ADD p[ESP],d 4474 cdb.last().Isib = modregrm(0,4,SP); 4475 } 4476 if (I64 && cdb.peek()) 4477 cdb.last().Irex |= REX_W; 4478 } 4479 else 4480 { 4481 /* 4482 Generate: 4483 MOV BX,SP 4484 ADD [SS:] p[BX],d 4485 For direct call: 4486 JMP sfunc 4487 For virtual call: 4488 MOV BX, p[BX] BX = this 4489 MOV BX, d2[BX] BX = this.vptr 4490 JMP i[BX] jump to virtual function 4491 */ 4492 4493 genregs(cdb,0x89,SP,BX); // MOV BX,SP 4494 cdb.genc(0x81,modregrm(2,0,7), 4495 FLconst,p, // to this 4496 FLconst,d); // ADD p[BX],d 4497 if (config.wflags & WFssneds || 4498 // If DS needs reloading from SS, 4499 // then assume SS != DS on thunk entry 4500 (LARGEDATA && config.wflags & WFss)) 4501 cdb.last().Iflags |= CFss; // SS: 4502 } 4503 4504 if ((i & 0xFFFF) != 0xFFFF) // if virtual call 4505 { 4506 const bool FARTHIS = (tysize(thisty) > REGSIZE); 4507 const bool FARVPTR = FARTHIS; 4508 4509 assert(thisty != TYvptr); // can't handle this case 4510 4511 if (!I16) 4512 { 4513 assert(!FARTHIS && !LARGECODE); 4514 if (thunkty == TYmfunc) // if 'this' is in ECX 4515 { 4516 // MOV EAX,d2[ECX] 4517 cdb.genc1(0x8B,modregrm(2,AX,CX),FLconst,d2); 4518 } 4519 else if (thunkty == TYjfunc) // if 'this' is in EAX 4520 { 4521 // MOV EAX,d2[EAX] 4522 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4523 } 4524 else 4525 { 4526 // MOV EAX,p[ESP] 4527 cdb.genc1(0x8B,(modregrm(0,4,SP) << 8) | modregrm(2,AX,4),FLconst,cast(targ_uns) p); 4528 if (I64) 4529 cdb.last().Irex |= REX_W; 4530 4531 // MOV EAX,d2[EAX] 4532 cdb.genc1(0x8B,modregrm(2,AX,AX),FLconst,d2); 4533 } 4534 if (I64) 4535 code_orrex(cdb.last(), REX_W); 4536 // JMP i[EAX] 4537 cdb.genc1(0xFF,modregrm(2,4,0),FLconst,cast(targ_uns) i); 4538 } 4539 else 4540 { 4541 // MOV/LES BX,[SS:] p[BX] 4542 cdb.genc1((FARTHIS ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,cast(targ_uns) p); 4543 if (config.wflags & WFssneds || 4544 // If DS needs reloading from SS, 4545 // then assume SS != DS on thunk entry 4546 (LARGEDATA && config.wflags & WFss)) 4547 cdb.last().Iflags |= CFss; // SS: 4548 4549 // MOV/LES BX,[ES:]d2[BX] 4550 cdb.genc1((FARVPTR ? 0xC4 : 0x8B),modregrm(2,BX,7),FLconst,d2); 4551 if (FARTHIS) 4552 cdb.last().Iflags |= CFes; // ES: 4553 4554 // JMP i[BX] 4555 cdb.genc1(0xFF,modregrm(2,(LARGECODE ? 5 : 4),7),FLconst,cast(targ_uns) i); 4556 if (FARVPTR) 4557 cdb.last().Iflags |= CFes; // ES: 4558 } 4559 } 4560 else 4561 { 4562 static if (0) 4563 { 4564 localgot = null; // no local variables 4565 code *c1 = load_localgot(); 4566 if (c1) 4567 { 4568 assignaddrc(c1); 4569 cdb.append(c1); 4570 } 4571 } 4572 cdb.gencs((LARGECODE ? 0xEA : 0xE9),0,FLfunc,sfunc); // JMP sfunc 4573 cdb.last().Iflags |= LARGECODE ? (CFseg | CFoff) : (CFselfrel | CFoff); 4574 } 4575 4576 thunkoffset = Offset(seg); 4577 code *c = cdb.finish(); 4578 pinholeopt(c,null); 4579 codout(seg,c); 4580 code_free(c); 4581 4582 sthunk.Soffset = thunkoffset; 4583 sthunk.Ssize = Offset(seg) - thunkoffset; // size of thunk 4584 sthunk.Sseg = seg; 4585 static if (TARGET_LINUX || TARGET_OSX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 4586 { 4587 objmod.pubdef(seg,sthunk,sthunk.Soffset); 4588 } 4589 static if (TARGET_WINDOS) 4590 { 4591 if (config.objfmt == OBJ_MSCOFF) 4592 objmod.pubdef(seg,sthunk,sthunk.Soffset); 4593 } 4594 searchfixlist(sthunk); // resolve forward refs 4595 } 4596 4597 /***************************** 4598 * Assume symbol s is extern. 4599 */ 4600 4601 void makeitextern(Symbol *s) 4602 { 4603 if (s.Sxtrnnum == 0) 4604 { 4605 s.Sclass = SCextern; /* external */ 4606 /*printf("makeitextern(x%x)\n",s);*/ 4607 objmod.external(s); 4608 } 4609 } 4610 4611 4612 /******************************* 4613 * Replace JMPs in Bgotocode with JMP SHORTs whereever possible. 4614 * This routine depends on FLcode jumps to only be forward 4615 * referenced. 4616 * BFLjmpoptdone is set to true if nothing more can be done 4617 * with this block. 4618 * Input: 4619 * flag !=0 means don't have correct Boffsets yet 4620 * Returns: 4621 * number of bytes saved 4622 */ 4623 4624 int branch(block *bl,int flag) 4625 { 4626 int bytesaved; 4627 code* c,cn,ct; 4628 targ_size_t offset,disp; 4629 targ_size_t csize; 4630 4631 if (!flag) 4632 bl.Bflags |= BFLjmpoptdone; // assume this will be all 4633 c = bl.Bcode; 4634 if (!c) 4635 return 0; 4636 bytesaved = 0; 4637 offset = bl.Boffset; /* offset of start of block */ 4638 while (1) 4639 { 4640 ubyte op; 4641 4642 csize = calccodsize(c); 4643 cn = code_next(c); 4644 op = cast(ubyte)c.Iop; 4645 if ((op & ~0x0F) == 0x70 && c.Iflags & CFjmp16 || 4646 (op == JMP && !(c.Iflags & CFjmp5))) 4647 { 4648 L1: 4649 switch (c.IFL2) 4650 { 4651 case FLblock: 4652 if (flag) // no offsets yet, don't optimize 4653 goto L3; 4654 disp = c.IEV2.Vblock.Boffset - offset - csize; 4655 4656 /* If this is a forward branch, and there is an aligned 4657 * block intervening, it is possible that shrinking 4658 * the jump instruction will cause it to be out of 4659 * range of the target. This happens if the alignment 4660 * prevents the target block from moving correspondingly 4661 * closer. 4662 */ 4663 if (disp >= 0x7F-4 && c.IEV2.Vblock.Boffset > offset) 4664 { /* Look for intervening alignment 4665 */ 4666 for (block *b = bl.Bnext; b; b = b.Bnext) 4667 { 4668 if (b.Balign) 4669 { 4670 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 4671 goto L3; 4672 } 4673 if (b == c.IEV2.Vblock) 4674 break; 4675 } 4676 } 4677 4678 break; 4679 4680 case FLcode: 4681 { 4682 code *cr; 4683 4684 disp = 0; 4685 4686 ct = c.IEV2.Vcode; /* target of branch */ 4687 assert(ct.Iflags & (CFtarg | CFtarg2)); 4688 for (cr = cn; cr; cr = code_next(cr)) 4689 { 4690 if (cr == ct) 4691 break; 4692 disp += calccodsize(cr); 4693 } 4694 4695 if (!cr) 4696 { // Didn't find it in forward search. Try backwards jump 4697 int s = 0; 4698 disp = 0; 4699 for (cr = bl.Bcode; cr != cn; cr = code_next(cr)) 4700 { 4701 assert(cr != null); // must have found it 4702 if (cr == ct) 4703 s = 1; 4704 if (s) 4705 disp += calccodsize(cr); 4706 } 4707 } 4708 4709 if (config.flags4 & CFG4optimized && !flag) 4710 { 4711 /* Propagate branch forward past junk */ 4712 while (1) 4713 { 4714 if (ct.Iop == NOP || 4715 ct.Iop == (ESCAPE | ESClinnum)) 4716 { 4717 ct = code_next(ct); 4718 if (!ct) 4719 goto L2; 4720 } 4721 else 4722 { 4723 c.IEV2.Vcode = ct; 4724 ct.Iflags |= CFtarg; 4725 break; 4726 } 4727 } 4728 4729 /* And eliminate jmps to jmps */ 4730 if ((op == ct.Iop || ct.Iop == JMP) && 4731 (op == JMP || c.Iflags & CFjmp16)) 4732 { 4733 c.IFL2 = ct.IFL2; 4734 c.IEV2.Vcode = ct.IEV2.Vcode; 4735 /*printf("eliminating branch\n");*/ 4736 goto L1; 4737 } 4738 L2: 4739 { } 4740 } 4741 } 4742 break; 4743 4744 default: 4745 goto L3; 4746 } 4747 4748 if (disp == 0) // bra to next instruction 4749 { 4750 bytesaved += csize; 4751 c.Iop = NOP; // del branch instruction 4752 c.IEV2.Vcode = null; 4753 c = cn; 4754 if (!c) 4755 break; 4756 continue; 4757 } 4758 else if (cast(targ_size_t)cast(targ_schar)(disp - 2) == (disp - 2) && 4759 cast(targ_size_t)cast(targ_schar)disp == disp) 4760 { 4761 if (op == JMP) 4762 { 4763 c.Iop = JMPS; // JMP SHORT 4764 bytesaved += I16 ? 1 : 3; 4765 } 4766 else // else Jcond 4767 { 4768 c.Iflags &= ~CFjmp16; // a branch is ok 4769 bytesaved += I16 ? 3 : 4; 4770 4771 // Replace a cond jump around a call to a function that 4772 // never returns with a cond jump to that function. 4773 if (config.flags4 & CFG4optimized && 4774 config.target_cpu >= TARGET_80386 && 4775 disp == (I16 ? 3 : 5) && 4776 cn && 4777 cn.Iop == CALL && 4778 cn.IFL2 == FLfunc && 4779 cn.IEV2.Vsym.Sflags & SFLexit && 4780 !(cn.Iflags & (CFtarg | CFtarg2)) 4781 ) 4782 { 4783 cn.Iop = 0x0F00 | ((c.Iop & 0x0F) ^ 0x81); 4784 c.Iop = NOP; 4785 c.IEV2.Vcode = null; 4786 bytesaved++; 4787 4788 // If nobody else points to ct, we can remove the CFtarg 4789 if (flag && ct) 4790 { 4791 code *cx; 4792 for (cx = bl.Bcode; 1; cx = code_next(cx)) 4793 { 4794 if (!cx) 4795 { 4796 ct.Iflags &= ~CFtarg; 4797 break; 4798 } 4799 if (cx.IEV2.Vcode == ct) 4800 break; 4801 } 4802 } 4803 } 4804 } 4805 csize = calccodsize(c); 4806 } 4807 else 4808 bl.Bflags &= ~BFLjmpoptdone; // some JMPs left 4809 } 4810 L3: 4811 if (cn) 4812 { 4813 offset += csize; 4814 c = cn; 4815 } 4816 else 4817 break; 4818 } 4819 //printf("bytesaved = x%x\n",bytesaved); 4820 return bytesaved; 4821 } 4822 4823 4824 /************************************************ 4825 * Adjust all Soffset's of stack variables so they 4826 * are all relative to the frame pointer. 4827 */ 4828 4829 version (MARS) 4830 { 4831 void cod3_adjSymOffsets() 4832 { 4833 SYMIDX si; 4834 4835 //printf("cod3_adjSymOffsets()\n"); 4836 for (si = 0; si < globsym.top; si++) 4837 { 4838 //printf("\tglobsym.tab[%d] = %p\n",si,globsym.tab[si]); 4839 Symbol *s = globsym.tab[si]; 4840 4841 switch (s.Sclass) 4842 { 4843 case SCparameter: 4844 case SCregpar: 4845 case SCshadowreg: 4846 //printf("s = '%s', Soffset = x%x, Para.size = x%x, EBPtoESP = x%x\n", s.Sident, s.Soffset, Para.size, EBPtoESP); 4847 s.Soffset += Para.size; 4848 if (0 && !(funcsym_p.Sfunc.Fflags3 & Fmember)) 4849 { 4850 if (!hasframe) 4851 s.Soffset += EBPtoESP; 4852 if (funcsym_p.Sfunc.Fflags3 & Fnested) 4853 s.Soffset += REGSIZE; 4854 } 4855 break; 4856 4857 case SCfastpar: 4858 //printf("\tfastpar %s %p Soffset %x Fast.size %x BPoff %x\n", s.Sident, s, (int)s.Soffset, (int)Fast.size, (int)BPoff); 4859 s.Soffset += Fast.size + BPoff; 4860 break; 4861 4862 case SCauto: 4863 case SCregister: 4864 if (s.Sfl == FLfast) 4865 s.Soffset += Fast.size + BPoff; 4866 else 4867 //printf("s = '%s', Soffset = x%x, Auto.size = x%x, BPoff = x%x EBPtoESP = x%x\n", s.Sident, (int)s.Soffset, (int)Auto.size, (int)BPoff, (int)EBPtoESP); 4868 // if (!(funcsym_p.Sfunc.Fflags3 & Fnested)) 4869 s.Soffset += Auto.size + BPoff; 4870 break; 4871 4872 case SCbprel: 4873 break; 4874 4875 default: 4876 continue; 4877 } 4878 static if (0) 4879 { 4880 if (!hasframe) 4881 s.Soffset += EBPtoESP; 4882 } 4883 } 4884 } 4885 4886 } 4887 4888 /******************************* 4889 * Take symbol info in union ev and replace it with a real address 4890 * in Vpointer. 4891 */ 4892 4893 void assignaddr(block *bl) 4894 { 4895 int EBPtoESPsave = EBPtoESP; 4896 int hasframesave = hasframe; 4897 4898 if (bl.Bflags & BFLoutsideprolog) 4899 { 4900 EBPtoESP = -REGSIZE; 4901 hasframe = 0; 4902 } 4903 assignaddrc(bl.Bcode); 4904 hasframe = hasframesave; 4905 EBPtoESP = EBPtoESPsave; 4906 } 4907 4908 void assignaddrc(code *c) 4909 { 4910 int sn; 4911 Symbol *s; 4912 ubyte ins,rm; 4913 targ_size_t soff; 4914 targ_size_t base; 4915 4916 base = EBPtoESP; 4917 for (; c; c = code_next(c)) 4918 { 4919 debug 4920 { 4921 if (0) 4922 { printf("assignaddrc()\n"); 4923 code_print(c); 4924 } 4925 if (code_next(c) && code_next(code_next(c)) == c) 4926 assert(0); 4927 } 4928 4929 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 4930 ins = vex_inssize(c); 4931 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 4932 ins = inssize2[(c.Iop >> 8) & 0xFF]; 4933 else if ((c.Iop & 0xFF00) == 0x0F00) 4934 ins = inssize2[c.Iop & 0xFF]; 4935 else if ((c.Iop & 0xFF) == ESCAPE) 4936 { 4937 if (c.Iop == (ESCAPE | ESCadjesp)) 4938 { 4939 //printf("adjusting EBPtoESP (%d) by %ld\n",EBPtoESP,(long)c.IEV1.Vint); 4940 EBPtoESP += c.IEV1.Vint; 4941 c.Iop = NOP; 4942 } 4943 else if (c.Iop == (ESCAPE | ESCfixesp)) 4944 { 4945 //printf("fix ESP\n"); 4946 if (hasframe) 4947 { 4948 // LEA ESP,-EBPtoESP[EBP] 4949 c.Iop = LEA; 4950 if (c.Irm & 8) 4951 c.Irex |= REX_R; 4952 c.Irm = modregrm(2,SP,BP); 4953 c.Iflags = CFoff; 4954 c.IFL1 = FLconst; 4955 c.IEV1.Vuns = -EBPtoESP; 4956 if (enforcealign) 4957 { 4958 // AND ESP, -STACKALIGN 4959 code *cn = code_calloc(); 4960 cn.Iop = 0x81; 4961 cn.Irm = modregrm(3, 4, SP); 4962 cn.Iflags = CFoff; 4963 cn.IFL2 = FLconst; 4964 cn.IEV2.Vsize_t = -STACKALIGN; 4965 if (I64) 4966 c.Irex |= REX_W; 4967 cn.next = c.next; 4968 c.next = cn; 4969 } 4970 } 4971 } 4972 else if (c.Iop == (ESCAPE | ESCframeptr)) 4973 { // Convert to load of frame pointer 4974 // c.Irm is the register to use 4975 if (hasframe && !enforcealign) 4976 { // MOV reg,EBP 4977 c.Iop = 0x89; 4978 if (c.Irm & 8) 4979 c.Irex |= REX_B; 4980 c.Irm = modregrm(3,BP,c.Irm & 7); 4981 } 4982 else 4983 { // LEA reg,EBPtoESP[ESP] 4984 c.Iop = LEA; 4985 if (c.Irm & 8) 4986 c.Irex |= REX_R; 4987 c.Irm = modregrm(2,c.Irm & 7,4); 4988 c.Isib = modregrm(0,4,SP); 4989 c.Iflags = CFoff; 4990 c.IFL1 = FLconst; 4991 c.IEV1.Vuns = EBPtoESP; 4992 } 4993 } 4994 if (I64) 4995 c.Irex |= REX_W; 4996 continue; 4997 } 4998 else 4999 ins = inssize[c.Iop & 0xFF]; 5000 if (!(ins & M) || 5001 ((rm = c.Irm) & 0xC0) == 0xC0) 5002 goto do2; /* if no first operand */ 5003 if (is32bitaddr(I32,c.Iflags)) 5004 { 5005 5006 if ( 5007 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 5008 ) 5009 goto do2; /* if no first operand */ 5010 } 5011 else 5012 { 5013 if ( 5014 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 5015 ) 5016 goto do2; /* if no first operand */ 5017 } 5018 s = c.IEV1.Vsym; 5019 switch (c.IFL1) 5020 { 5021 case FLdata: 5022 if (config.objfmt == OBJ_OMF && s.Sclass != SCcomdat) 5023 { 5024 version (MARS) 5025 { 5026 c.IEV1.Vseg = s.Sseg; 5027 } 5028 else 5029 { 5030 c.IEV1.Vseg = DATA; 5031 } 5032 c.IEV1.Vpointer += s.Soffset; 5033 c.IFL1 = FLdatseg; 5034 } 5035 else 5036 c.IFL1 = FLextern; 5037 goto do2; 5038 5039 case FLudata: 5040 if (config.objfmt == OBJ_OMF) 5041 { 5042 version (MARS) 5043 { 5044 c.IEV1.Vseg = s.Sseg; 5045 } 5046 else 5047 { 5048 c.IEV1.Vseg = UDATA; 5049 } 5050 c.IEV1.Vpointer += s.Soffset; 5051 c.IFL1 = FLdatseg; 5052 } 5053 else 5054 c.IFL1 = FLextern; 5055 goto do2; 5056 5057 case FLtlsdata: 5058 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5059 c.IFL1 = FLextern; 5060 goto do2; 5061 5062 case FLdatseg: 5063 //c.IEV1.Vseg = DATA; 5064 goto do2; 5065 5066 case FLfardata: 5067 case FLcsdata: 5068 case FLpseudo: 5069 goto do2; 5070 5071 case FLstack: 5072 //printf("Soffset = %d, EBPtoESP = %d, base = %d, pointer = %d\n", 5073 //s.Soffset,EBPtoESP,base,c.IEV1.Vpointer); 5074 c.IEV1.Vpointer += s.Soffset + EBPtoESP - base - EEStack.offset; 5075 break; 5076 5077 case FLfast: 5078 soff = Fast.size; 5079 goto L1; 5080 5081 case FLreg: 5082 case FLauto: 5083 soff = Auto.size; 5084 L1: 5085 if (Symbol_Sisdead(s, anyiasm)) 5086 { 5087 c.Iop = NOP; // remove references to it 5088 continue; 5089 } 5090 if (s.Sfl == FLreg && c.IEV1.Vpointer < 2) 5091 { 5092 reg_t reg = s.Sreglsw; 5093 5094 assert(!(s.Sregm & ~mask(reg))); 5095 if (c.IEV1.Vpointer == 1) 5096 { 5097 assert(reg < 4); /* must be a BYTEREGS */ 5098 reg |= 4; /* convert to high byte reg */ 5099 } 5100 if (reg & 8) 5101 { 5102 assert(I64); 5103 c.Irex |= REX_B; 5104 reg &= 7; 5105 } 5106 c.Irm = (c.Irm & modregrm(0,7,0)) 5107 | modregrm(3,0,reg); 5108 assert(c.Iop != LES && c.Iop != LEA); 5109 goto do2; 5110 } 5111 else 5112 { c.IEV1.Vpointer += s.Soffset + soff + BPoff; 5113 if (s.Sflags & SFLunambig) 5114 c.Iflags |= CFunambig; 5115 L2: 5116 if (!hasframe || (enforcealign && c.IFL1 != FLpara)) 5117 { /* Convert to ESP relative address instead of EBP */ 5118 assert(!I16); 5119 c.IEV1.Vpointer += EBPtoESP; 5120 ubyte crm = c.Irm; 5121 if ((crm & 7) == 4) // if SIB byte 5122 { 5123 assert((c.Isib & 7) == BP); 5124 assert((crm & 0xC0) != 0); 5125 c.Isib = (c.Isib & ~7) | modregrm(0,0,SP); 5126 } 5127 else 5128 { 5129 assert((crm & 7) == 5); 5130 c.Irm = (crm & modregrm(0,7,0)) 5131 | modregrm(2,0,4); 5132 c.Isib = modregrm(0,4,SP); 5133 } 5134 } 5135 } 5136 break; 5137 5138 case FLpara: 5139 soff = Para.size - BPoff; // cancel out add of BPoff 5140 goto L1; 5141 5142 case FLfltreg: 5143 c.IEV1.Vpointer += Foff + BPoff; 5144 c.Iflags |= CFunambig; 5145 goto L2; 5146 5147 case FLallocatmp: 5148 c.IEV1.Vpointer += Alloca.offset + BPoff; 5149 goto L2; 5150 5151 case FLfuncarg: 5152 c.IEV1.Vpointer += cgstate.funcarg.offset + BPoff; 5153 goto L2; 5154 5155 case FLbprel: 5156 c.IEV1.Vpointer += s.Soffset; 5157 break; 5158 5159 case FLcs: 5160 sn = c.IEV1.Vuns; 5161 if (!CSE.loaded(sn)) // if never loaded 5162 { 5163 c.Iop = NOP; 5164 continue; 5165 } 5166 c.IEV1.Vpointer = CSE.offset(sn) + CSoff + BPoff; 5167 c.Iflags |= CFunambig; 5168 goto L2; 5169 5170 case FLregsave: 5171 sn = c.IEV1.Vuns; 5172 c.IEV1.Vpointer = sn + regsave.off + BPoff; 5173 c.Iflags |= CFunambig; 5174 goto L2; 5175 5176 case FLndp: 5177 version (MARS) 5178 { 5179 assert(c.IEV1.Vuns < global87.save.length); 5180 } 5181 c.IEV1.Vpointer = c.IEV1.Vuns * tysize(TYldouble) + NDPoff + BPoff; 5182 c.Iflags |= CFunambig; 5183 goto L2; 5184 5185 case FLoffset: 5186 break; 5187 5188 case FLlocalsize: 5189 c.IEV1.Vpointer += localsize; 5190 break; 5191 5192 case FLconst: 5193 default: 5194 goto do2; 5195 } 5196 c.IFL1 = FLconst; 5197 do2: 5198 /* Ignore TEST (F6 and F7) opcodes */ 5199 if (!(ins & T)) goto done; /* if no second operand */ 5200 s = c.IEV2.Vsym; 5201 switch (c.IFL2) 5202 { 5203 case FLdata: 5204 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5205 { 5206 c.IFL2 = FLextern; 5207 goto do2; 5208 } 5209 else 5210 { 5211 if (s.Sclass == SCcomdat) 5212 { c.IFL2 = FLextern; 5213 goto do2; 5214 } 5215 c.IEV2.Vseg = MARS ? s.Sseg : DATA; 5216 c.IEV2.Vpointer += s.Soffset; 5217 c.IFL2 = FLdatseg; 5218 goto done; 5219 } 5220 5221 case FLudata: 5222 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5223 { 5224 c.IFL2 = FLextern; 5225 goto do2; 5226 } 5227 else 5228 { 5229 c.IEV2.Vseg = MARS ? s.Sseg : UDATA; 5230 c.IEV2.Vpointer += s.Soffset; 5231 c.IFL2 = FLdatseg; 5232 goto done; 5233 } 5234 5235 case FLtlsdata: 5236 if (config.objfmt == OBJ_ELF || config.objfmt == OBJ_MACH) 5237 { 5238 c.IFL2 = FLextern; 5239 goto do2; 5240 } 5241 goto done; 5242 5243 case FLdatseg: 5244 //c.IEV2.Vseg = DATA; 5245 goto done; 5246 5247 case FLcsdata: 5248 case FLfardata: 5249 goto done; 5250 5251 case FLreg: 5252 case FLpseudo: 5253 assert(0); 5254 /* NOTREACHED */ 5255 5256 case FLfast: 5257 c.IEV2.Vpointer += s.Soffset + Fast.size + BPoff; 5258 break; 5259 5260 case FLauto: 5261 c.IEV2.Vpointer += s.Soffset + Auto.size + BPoff; 5262 L3: 5263 if (!hasframe || (enforcealign && c.IFL2 != FLpara)) 5264 /* Convert to ESP relative address instead of EBP */ 5265 c.IEV2.Vpointer += EBPtoESP; 5266 break; 5267 5268 case FLpara: 5269 c.IEV2.Vpointer += s.Soffset + Para.size; 5270 goto L3; 5271 5272 case FLfltreg: 5273 c.IEV2.Vpointer += Foff + BPoff; 5274 goto L3; 5275 5276 case FLallocatmp: 5277 c.IEV2.Vpointer += Alloca.offset + BPoff; 5278 goto L3; 5279 5280 case FLfuncarg: 5281 c.IEV2.Vpointer += cgstate.funcarg.offset + BPoff; 5282 goto L3; 5283 5284 case FLbprel: 5285 c.IEV2.Vpointer += s.Soffset; 5286 break; 5287 5288 case FLstack: 5289 c.IEV2.Vpointer += s.Soffset + EBPtoESP - base; 5290 break; 5291 5292 case FLcs: 5293 case FLndp: 5294 case FLregsave: 5295 assert(0); 5296 5297 case FLconst: 5298 break; 5299 5300 case FLlocalsize: 5301 c.IEV2.Vpointer += localsize; 5302 break; 5303 5304 default: 5305 goto done; 5306 } 5307 c.IFL2 = FLconst; 5308 done: 5309 { } 5310 } 5311 } 5312 5313 /******************************* 5314 * Return offset from BP of symbol s. 5315 */ 5316 5317 targ_size_t cod3_bpoffset(Symbol *s) 5318 { 5319 targ_size_t offset; 5320 5321 symbol_debug(s); 5322 offset = s.Soffset; 5323 switch (s.Sfl) 5324 { 5325 case FLpara: 5326 offset += Para.size; 5327 break; 5328 5329 case FLfast: 5330 offset += Fast.size + BPoff; 5331 break; 5332 5333 case FLauto: 5334 offset += Auto.size + BPoff; 5335 break; 5336 5337 default: 5338 WRFL(cast(FL)s.Sfl); 5339 symbol_print(s); 5340 assert(0); 5341 } 5342 assert(hasframe); 5343 return offset; 5344 } 5345 5346 5347 /******************************* 5348 * Find shorter versions of the same instructions. 5349 * Does these optimizations: 5350 * replaces jmps to the next instruction with NOPs 5351 * sign extension of modregrm displacement 5352 * sign extension of immediate data (can't do it for OR, AND, XOR 5353 * as the opcodes are not defined) 5354 * short versions for AX EA 5355 * short versions for reg EA 5356 * Code is neither removed nor added. 5357 * Params: 5358 * b = block for code (or null) 5359 * c = code list to optimize 5360 */ 5361 5362 void pinholeopt(code *c,block *b) 5363 { 5364 targ_size_t a; 5365 uint mod; 5366 ubyte ins; 5367 int usespace; 5368 int useopsize; 5369 int space; 5370 block *bn; 5371 5372 debug 5373 { 5374 __gshared int tested; if (!tested) { tested++; pinholeopt_unittest(); } 5375 } 5376 5377 debug 5378 { 5379 code *cstart = c; 5380 if (debugc) 5381 { 5382 printf("+pinholeopt(%p)\n",c); 5383 } 5384 } 5385 5386 if (b) 5387 { 5388 bn = b.Bnext; 5389 usespace = (config.flags4 & CFG4space && b.BC != BCasm); 5390 useopsize = (I16 || (config.flags4 & CFG4space && b.BC != BCasm)); 5391 } 5392 else 5393 { 5394 bn = null; 5395 usespace = (config.flags4 & CFG4space); 5396 useopsize = (I16 || config.flags4 & CFG4space); 5397 } 5398 for (; c; c = code_next(c)) 5399 { 5400 L1: 5401 opcode_t op = c.Iop; 5402 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 5403 ins = vex_inssize(c); 5404 else if ((op & 0xFFFD00) == 0x0F3800) 5405 ins = inssize2[(op >> 8) & 0xFF]; 5406 else if ((op & 0xFF00) == 0x0F00) 5407 ins = inssize2[op & 0xFF]; 5408 else 5409 ins = inssize[op & 0xFF]; 5410 if (ins & M) // if modregrm byte 5411 { 5412 int shortop = (c.Iflags & CFopsize) ? !I16 : I16; 5413 int local_BPRM = BPRM; 5414 5415 if (c.Iflags & CFaddrsize) 5416 local_BPRM ^= 5 ^ 6; // toggle between 5 and 6 5417 5418 uint rm = c.Irm; 5419 reg_t reg = rm & modregrm(0,7,0); // isolate reg field 5420 reg_t ereg = rm & 7; 5421 //printf("c = %p, op = %02x rm = %02x\n", c, op, rm); 5422 5423 /* If immediate second operand */ 5424 if ((ins & T || 5425 ((op == 0xF6 || op == 0xF7) && (reg < modregrm(0,2,0) || reg > modregrm(0,3,0))) 5426 ) && 5427 c.IFL2 == FLconst) 5428 { 5429 int flags = c.Iflags & CFpsw; /* if want result in flags */ 5430 targ_long u = c.IEV2.Vuns; 5431 if (ins & E) 5432 u = cast(byte) u; 5433 else if (shortop) 5434 u = cast(short) u; 5435 5436 // Replace CMP reg,0 with TEST reg,reg 5437 if ((op & 0xFE) == 0x80 && // 80 is CMP R8,imm8; 81 is CMP reg,imm 5438 rm >= modregrm(3,7,AX) && 5439 u == 0) 5440 { 5441 c.Iop = (op & 1) | 0x84; 5442 c.Irm = modregrm(3,ereg,ereg); 5443 if (c.Irex & REX_B) 5444 c.Irex |= REX_R; 5445 goto L1; 5446 } 5447 5448 /* Optimize ANDs with an immediate constant */ 5449 if ((op == 0x81 || op == 0x80) && reg == modregrm(0,4,0)) 5450 { 5451 if (rm >= modregrm(3,4,AX)) // AND reg,imm 5452 { 5453 if (u == 0) 5454 { 5455 /* Replace with XOR reg,reg */ 5456 c.Iop = 0x30 | (op & 1); 5457 c.Irm = modregrm(3,ereg,ereg); 5458 if (c.Irex & REX_B) 5459 c.Irex |= REX_R; 5460 goto L1; 5461 } 5462 if (u == 0xFFFFFFFF && !flags) 5463 { 5464 c.Iop = NOP; 5465 goto L1; 5466 } 5467 } 5468 if (op == 0x81 && !flags) 5469 { // If we can do the operation in one byte 5470 5471 // If EA is not SI or DI 5472 if ((rm < modregrm(3,4,SP) || I64) && 5473 (config.flags4 & CFG4space || 5474 config.target_cpu < TARGET_PentiumPro) 5475 ) 5476 { 5477 if ((u & 0xFFFFFF00) == 0xFFFFFF00) 5478 goto L2; 5479 else if (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4)) 5480 { 5481 if (!shortop) 5482 { 5483 if ((u & 0xFFFF00FF) == 0xFFFF00FF) 5484 goto L3; 5485 } 5486 else 5487 { 5488 if ((u & 0xFF) == 0xFF) 5489 goto L3; 5490 } 5491 } 5492 } 5493 if (!shortop && useopsize) 5494 { 5495 if ((u & 0xFFFF0000) == 0xFFFF0000) 5496 { 5497 c.Iflags ^= CFopsize; 5498 goto L1; 5499 } 5500 if ((u & 0xFFFF) == 0xFFFF && rm < modregrm(3,4,AX)) 5501 { 5502 c.IEV1.Voffset += 2; /* address MSW */ 5503 c.IEV2.Vuns >>= 16; 5504 c.Iflags ^= CFopsize; 5505 goto L1; 5506 } 5507 if (rm >= modregrm(3,4,AX)) 5508 { 5509 if (u == 0xFF && (rm <= modregrm(3,4,BX) || I64)) 5510 { 5511 c.Iop = 0x0FB6; // MOVZX 5512 c.Irm = modregrm(3,ereg,ereg); 5513 if (c.Irex & REX_B) 5514 c.Irex |= REX_R; 5515 goto L1; 5516 } 5517 if (u == 0xFFFF) 5518 { 5519 c.Iop = 0x0FB7; // MOVZX 5520 c.Irm = modregrm(3,ereg,ereg); 5521 if (c.Irex & REX_B) 5522 c.Irex |= REX_R; 5523 goto L1; 5524 } 5525 } 5526 } 5527 } 5528 } 5529 5530 /* Look for ADD,OR,SUB,XOR with u that we can eliminate */ 5531 if (!flags && 5532 (op == 0x81 || op == 0x80) && 5533 (reg == modregrm(0,0,0) || reg == modregrm(0,1,0) || // ADD,OR 5534 reg == modregrm(0,5,0) || reg == modregrm(0,6,0)) // SUB, XOR 5535 ) 5536 { 5537 if (u == 0) 5538 { 5539 c.Iop = NOP; 5540 goto L1; 5541 } 5542 if (u == ~0 && reg == modregrm(0,6,0)) /* XOR */ 5543 { 5544 c.Iop = 0xF6 | (op & 1); /* NOT */ 5545 c.Irm ^= modregrm(0,6^2,0); 5546 goto L1; 5547 } 5548 if (!shortop && 5549 useopsize && 5550 op == 0x81 && 5551 (u & 0xFFFF0000) == 0 && 5552 (reg == modregrm(0,6,0) || reg == modregrm(0,1,0))) 5553 { 5554 c.Iflags ^= CFopsize; 5555 goto L1; 5556 } 5557 } 5558 5559 /* Look for TEST or OR or XOR with an immediate constant */ 5560 /* that we can replace with a byte operation */ 5561 if (op == 0xF7 && reg == modregrm(0,0,0) || 5562 op == 0x81 && reg == modregrm(0,6,0) && !flags || 5563 op == 0x81 && reg == modregrm(0,1,0)) 5564 { 5565 // See if we can replace a dword with a word 5566 // (avoid for 32 bit instructions, because CFopsize 5567 // is too slow) 5568 if (!shortop && useopsize) 5569 { 5570 if ((u & 0xFFFF0000) == 0) 5571 { 5572 c.Iflags ^= CFopsize; 5573 goto L1; 5574 } 5575 /* If memory (not register) addressing mode */ 5576 if ((u & 0xFFFF) == 0 && rm < modregrm(3,0,AX)) 5577 { 5578 c.IEV1.Voffset += 2; /* address MSW */ 5579 c.IEV2.Vuns >>= 16; 5580 c.Iflags ^= CFopsize; 5581 goto L1; 5582 } 5583 } 5584 5585 // If EA is not SI or DI 5586 if (rm < (modregrm(3,0,SP) | reg) && 5587 (usespace || 5588 config.target_cpu < TARGET_PentiumPro) 5589 ) 5590 { 5591 if ((u & 0xFFFFFF00) == 0) 5592 { 5593 L2: c.Iop--; /* to byte instruction */ 5594 c.Iflags &= ~CFopsize; 5595 goto L1; 5596 } 5597 if (((u & 0xFFFF00FF) == 0 || 5598 (shortop && (u & 0xFF) == 0)) && 5599 (rm < modregrm(3,0,0) || (!c.Irex && ereg < 4))) 5600 { 5601 L3: 5602 c.IEV2.Vuns >>= 8; 5603 if (rm >= (modregrm(3,0,AX) | reg)) 5604 c.Irm |= 4; /* AX.AH, BX.BH, etc. */ 5605 else 5606 c.IEV1.Voffset += 1; 5607 goto L2; 5608 } 5609 } 5610 5611 // BUG: which is right? 5612 //else if ((u & 0xFFFF0000) == 0) 5613 5614 else if (0 && op == 0xF7 && 5615 rm >= modregrm(3,0,SP) && 5616 (u & 0xFFFF0000) == 0) 5617 5618 c.Iflags &= ~CFopsize; 5619 } 5620 5621 // Try to replace TEST reg,-1 with TEST reg,reg 5622 if (op == 0xF6 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7)) // TEST regL,immed8 5623 { 5624 if ((u & 0xFF) == 0xFF) 5625 { 5626 L4: 5627 c.Iop = 0x84; // TEST regL,regL 5628 c.Irm = modregrm(3,ereg,ereg); 5629 if (c.Irex & REX_B) 5630 c.Irex |= REX_R; 5631 c.Iflags &= ~CFopsize; 5632 goto L1; 5633 } 5634 } 5635 if (op == 0xF7 && rm >= modregrm(3,0,AX) && rm <= modregrm(3,0,7) && (I64 || ereg < 4)) 5636 { 5637 if (u == 0xFF) 5638 { 5639 if (ereg & 4) // SIL,DIL,BPL,SPL need REX prefix 5640 c.Irex |= REX; 5641 goto L4; 5642 } 5643 if ((u & 0xFFFF) == 0xFF00 && shortop && !c.Irex && ereg < 4) 5644 { 5645 ereg |= 4; /* to regH */ 5646 goto L4; 5647 } 5648 } 5649 5650 /* Look for sign extended immediate data */ 5651 if (cast(byte) u == u) 5652 { 5653 if (op == 0x81) 5654 { 5655 if (reg != 0x08 && reg != 0x20 && reg != 0x30) 5656 c.Iop = op = 0x83; /* 8 bit sgn ext */ 5657 } 5658 else if (op == 0x69) /* IMUL rw,ew,dw */ 5659 c.Iop = op = 0x6B; /* IMUL rw,ew,db */ 5660 } 5661 5662 // Look for SHIFT EA,imm8 we can replace with short form 5663 if (u == 1 && ((op & 0xFE) == 0xC0)) 5664 c.Iop |= 0xD0; 5665 5666 } /* if immediate second operand */ 5667 5668 /* Look for AX short form */ 5669 if (ins & A) 5670 { 5671 if (rm == modregrm(0,AX,local_BPRM) && 5672 !(c.Irex & REX_R) && // and it's AX, not R8 5673 (op & ~3) == 0x88 && 5674 !I64) 5675 { 5676 op = ((op & 3) + 0xA0) ^ 2; 5677 /* 8A. A0 */ 5678 /* 8B. A1 */ 5679 /* 88. A2 */ 5680 /* 89. A3 */ 5681 c.Iop = op; 5682 c.IFL2 = c.IFL1; 5683 c.IEV2 = c.IEV1; 5684 } 5685 5686 /* Replace MOV REG1,REG2 with MOV EREG1,EREG2 */ 5687 else if (!I16 && 5688 (op == 0x89 || op == 0x8B) && 5689 (rm & 0xC0) == 0xC0 && 5690 (!b || b.BC != BCasm) 5691 ) 5692 c.Iflags &= ~CFopsize; 5693 5694 // If rm is AX 5695 else if ((rm & modregrm(3,0,7)) == modregrm(3,0,AX) && !(c.Irex & (REX_R | REX_B))) 5696 { 5697 switch (op) 5698 { 5699 case 0x80: op = reg | 4; break; 5700 case 0x81: op = reg | 5; break; 5701 case 0x87: op = 0x90 + (reg>>3); break; // XCHG 5702 5703 case 0xF6: 5704 if (reg == 0) 5705 op = 0xA8; /* TEST AL,immed8 */ 5706 break; 5707 5708 case 0xF7: 5709 if (reg == 0) 5710 op = 0xA9; /* TEST AX,immed16 */ 5711 break; 5712 5713 default: 5714 break; 5715 } 5716 c.Iop = op; 5717 } 5718 } 5719 5720 /* Look for reg short form */ 5721 if ((ins & R) && (rm & 0xC0) == 0xC0) 5722 { 5723 switch (op) 5724 { 5725 case 0xC6: op = 0xB0 + ereg; break; 5726 case 0xC7: // if no sign extension 5727 if (!(c.Irex & REX_W && c.IEV2.Vint < 0)) 5728 { 5729 c.Irm = 0; 5730 c.Irex &= ~REX_W; 5731 op = 0xB8 + ereg; 5732 } 5733 break; 5734 5735 case 0xFF: 5736 switch (reg) 5737 { case 6<<3: op = 0x50+ereg; break;/* PUSH*/ 5738 case 0<<3: if (!I64) op = 0x40+ereg; break; /* INC*/ 5739 case 1<<3: if (!I64) op = 0x48+ereg; break; /* DEC*/ 5740 default: break; 5741 } 5742 break; 5743 5744 case 0x8F: op = 0x58 + ereg; break; 5745 case 0x87: 5746 if (reg == 0 && !(c.Irex & (REX_R | REX_B))) // Issue 12968: Needed to ensure it's referencing RAX, not R8 5747 op = 0x90 + ereg; 5748 break; 5749 5750 default: 5751 break; 5752 } 5753 c.Iop = op; 5754 } 5755 5756 // Look to remove redundant REX prefix on XOR 5757 if (c.Irex == REX_W // ignore ops involving R8..R15 5758 && (op == 0x31 || op == 0x33) // XOR 5759 && ((rm & 0xC0) == 0xC0) // register direct 5760 && ((reg >> 3) == ereg)) // register with itself 5761 { 5762 c.Irex = 0; 5763 } 5764 5765 // Look to replace SHL reg,1 with ADD reg,reg 5766 if ((op & ~1) == 0xD0 && 5767 (rm & modregrm(3,7,0)) == modregrm(3,4,0) && 5768 config.target_cpu >= TARGET_80486) 5769 { 5770 c.Iop &= 1; 5771 c.Irm = cast(ubyte)((rm & modregrm(3,0,7)) | (ereg << 3)); 5772 if (c.Irex & REX_B) 5773 c.Irex |= REX_R; 5774 if (!(c.Iflags & CFpsw) && !I16) 5775 c.Iflags &= ~CFopsize; 5776 goto L1; 5777 } 5778 5779 /* Look for sign extended modregrm displacement, or 0 5780 * displacement. 5781 */ 5782 5783 if (((rm & 0xC0) == 0x80) && // it's a 16/32 bit disp 5784 c.IFL1 == FLconst) // and it's a constant 5785 { 5786 a = c.IEV1.Vpointer; 5787 if (a == 0 && (rm & 7) != local_BPRM && // if 0[disp] 5788 !(local_BPRM == 5 && (rm & 7) == 4 && (c.Isib & 7) == BP) 5789 ) 5790 c.Irm &= 0x3F; 5791 else if (!I16) 5792 { 5793 if (cast(targ_size_t)cast(targ_schar)a == a) 5794 c.Irm ^= 0xC0; /* do 8 sx */ 5795 } 5796 else if ((cast(targ_size_t)cast(targ_schar)a & 0xFFFF) == (a & 0xFFFF)) 5797 c.Irm ^= 0xC0; /* do 8 sx */ 5798 } 5799 5800 /* Look for LEA reg,[ireg], replace with MOV reg,ireg */ 5801 if (op == LEA) 5802 { 5803 rm = c.Irm & 7; 5804 mod = c.Irm & modregrm(3,0,0); 5805 if (mod == 0) 5806 { 5807 if (!I16) 5808 { 5809 switch (rm) 5810 { 5811 case 4: 5812 case 5: 5813 break; 5814 5815 default: 5816 c.Irm |= modregrm(3,0,0); 5817 c.Iop = 0x8B; 5818 break; 5819 } 5820 } 5821 else 5822 { 5823 switch (rm) 5824 { 5825 case 4: rm = modregrm(3,0,SI); goto L6; 5826 case 5: rm = modregrm(3,0,DI); goto L6; 5827 case 7: rm = modregrm(3,0,BX); goto L6; 5828 L6: c.Irm = cast(ubyte)(rm + reg); 5829 c.Iop = 0x8B; 5830 break; 5831 5832 default: 5833 break; 5834 } 5835 } 5836 } 5837 5838 /* replace LEA reg,0[BP] with MOV reg,BP */ 5839 else if (mod == modregrm(1,0,0) && rm == local_BPRM && 5840 c.IFL1 == FLconst && c.IEV1.Vpointer == 0) 5841 { 5842 c.Iop = 0x8B; /* MOV reg,BP */ 5843 c.Irm = cast(ubyte)(modregrm(3,0,BP) + reg); 5844 } 5845 } 5846 5847 // Replace [R13] with 0[R13] 5848 if (c.Irex & REX_B && ((c.Irm & modregrm(3,0,7)) == modregrm(0,0,BP) || 5849 issib(c.Irm) && (c.Irm & modregrm(3,0,0)) == 0 && (c.Isib & 7) == BP)) 5850 { 5851 c.Irm |= modregrm(1,0,0); 5852 c.IFL1 = FLconst; 5853 c.IEV1.Vpointer = 0; 5854 } 5855 } 5856 else if (!(c.Iflags & CFvex)) 5857 { 5858 switch (op) 5859 { 5860 default: 5861 // Look for MOV r64, immediate 5862 if ((c.Irex & REX_W) && (op & ~7) == 0xB8) 5863 { 5864 /* Look for zero extended immediate data */ 5865 if (c.IEV2.Vsize_t == c.IEV2.Vuns) 5866 { 5867 c.Irex &= ~REX_W; 5868 } 5869 /* Look for sign extended immediate data */ 5870 else if (c.IEV2.Vsize_t == c.IEV2.Vint) 5871 { 5872 c.Irm = modregrm(3,0,op & 7); 5873 c.Iop = op = 0xC7; 5874 c.IEV2.Vsize_t = c.IEV2.Vuns; 5875 } 5876 } 5877 if ((op & ~0x0F) != 0x70) 5878 break; 5879 goto case JMP; 5880 5881 case JMP: 5882 switch (c.IFL2) 5883 { 5884 case FLcode: 5885 if (c.IEV2.Vcode == code_next(c)) 5886 { 5887 c.Iop = NOP; 5888 continue; 5889 } 5890 break; 5891 5892 case FLblock: 5893 if (!code_next(c) && c.IEV2.Vblock == bn) 5894 { 5895 c.Iop = NOP; 5896 continue; 5897 } 5898 break; 5899 5900 case FLconst: 5901 case FLfunc: 5902 case FLextern: 5903 break; 5904 5905 default: 5906 WRFL(cast(FL)c.IFL2); 5907 assert(0); 5908 } 5909 break; 5910 5911 case 0x68: // PUSH immed16 5912 if (c.IFL2 == FLconst) 5913 { 5914 targ_long u = c.IEV2.Vuns; 5915 if (I64 || 5916 ((c.Iflags & CFopsize) ? I16 : I32)) 5917 { // PUSH 32/64 bit operand 5918 if (u == cast(byte) u) 5919 c.Iop = 0x6A; // PUSH immed8 5920 } 5921 else // PUSH 16 bit operand 5922 { 5923 if (cast(short)u == cast(byte) u) 5924 c.Iop = 0x6A; // PUSH immed8 5925 } 5926 } 5927 break; 5928 } 5929 } 5930 } 5931 5932 debug 5933 if (debugc) 5934 { 5935 printf("-pinholeopt(%p)\n",cstart); 5936 for (c = cstart; c; c = code_next(c)) 5937 code_print(c); 5938 } 5939 } 5940 5941 5942 debug 5943 { 5944 private void pinholeopt_unittest() 5945 { 5946 //printf("pinholeopt_unittest()\n"); 5947 static struct CS 5948 { 5949 uint model,op,ea; 5950 targ_size_t ev1,ev2; 5951 uint flags; 5952 } 5953 __gshared CS[2][22] tests = 5954 [ 5955 // XOR reg,immed NOT regL 5956 [ { 16,0x81,modregrm(3,6,BX),0,0xFF,0 }, { 0,0xF6,modregrm(3,2,BX),0,0xFF } ], 5957 5958 // MOV 0[BX],3 MOV [BX],3 5959 [ { 16,0xC7,modregrm(2,0,7),0,3 }, { 0,0xC7,modregrm(0,0,7),0,3 } ], 5960 5961 /+ // only if config.flags4 & CFG4space 5962 // TEST regL,immed8 5963 [ { 0,0xF6,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 5964 [ { 0,0xF7,modregrm(3,0,BX),0,0xFF,0 }, { 0,0x84,modregrm(3,BX,BX),0,0xFF }], 5965 [ { 64,0xF6,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 5966 [ { 64,0xF7,modregrmx(3,0,R8),0,0xFF,0 }, { 0,0x84,modregxrmx(3,R8,R8),0,0xFF }], 5967 +/ 5968 5969 // PUSH immed => PUSH immed8 5970 [ { 0,0x68,0,0,0 }, { 0,0x6A,0,0,0 }], 5971 [ { 0,0x68,0,0,0x7F }, { 0,0x6A,0,0,0x7F }], 5972 [ { 0,0x68,0,0,0x80 }, { 0,0x68,0,0,0x80 }], 5973 [ { 16,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 5974 [ { 16,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 5975 [ { 16,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 5976 [ { 16,0x68,0,0,0x10000,0 }, { 0,0x6A,0,0,0x10000,0 }], 5977 [ { 16,0x68,0,0,0x10000,CFopsize }, { 0,0x68,0,0,0x10000,CFopsize }], 5978 [ { 32,0x68,0,0,0,CFopsize }, { 0,0x6A,0,0,0,CFopsize }], 5979 [ { 32,0x68,0,0,0x7F,CFopsize }, { 0,0x6A,0,0,0x7F,CFopsize }], 5980 [ { 32,0x68,0,0,0x80,CFopsize }, { 0,0x68,0,0,0x80,CFopsize }], 5981 [ { 32,0x68,0,0,0x10000,CFopsize }, { 0,0x6A,0,0,0x10000,CFopsize }], 5982 [ { 32,0x68,0,0,0x8000,CFopsize }, { 0,0x68,0,0,0x8000,CFopsize }], 5983 5984 // clear r64, for r64 != R8..R15 5985 [ { 64,0x31,0x800C0,0,0,0 }, { 0,0x31,0xC0,0,0,0}], 5986 [ { 64,0x33,0x800C0,0,0,0 }, { 0,0x33,0xC0,0,0,0}], 5987 5988 // MOV r64, immed 5989 [ { 64,0xC7,0x800C0,0,0xFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,0xFFFFFFFF,0}], 5990 [ { 64,0xC7,0x800C0,0,0x7FFFFFFF,0 }, { 0,0xB8,0,0,0x7FFFFFFF,0}], 5991 [ { 64,0xB8,0x80000,0,0xFFFFFFFF,0 }, { 0,0xB8,0,0,0xFFFFFFFF,0 }], 5992 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }, { 0,0xB8,0x80000,0,cast(targ_size_t)0x1FFFFFFFF,0 }], 5993 [ { 64,0xB8,0x80000,0,cast(targ_size_t)0xFFFFFFFFFFFFFFFF,0 }, { 0,0xC7,0x800C0,0,cast(targ_size_t)0xFFFFFFFF,0}], 5994 ]; 5995 5996 //config.flags4 |= CFG4space; 5997 for (int i = 0; i < tests.length; i++) 5998 { CS *pin = &tests[i][0]; 5999 CS *pout = &tests[i][1]; 6000 code cs = void; 6001 memset(&cs, 0, cs.sizeof); 6002 if (pin.model) 6003 { 6004 if (I16 && pin.model != 16) 6005 continue; 6006 if (I32 && pin.model != 32) 6007 continue; 6008 if (I64 && pin.model != 64) 6009 continue; 6010 } 6011 //printf("[%d]\n", i); 6012 cs.Iop = pin.op; 6013 cs.Iea = pin.ea; 6014 cs.IFL1 = FLconst; 6015 cs.IFL2 = FLconst; 6016 cs.IEV1.Vsize_t = pin.ev1; 6017 cs.IEV2.Vsize_t = pin.ev2; 6018 cs.Iflags = pin.flags; 6019 pinholeopt(&cs, null); 6020 if (cs.Iop != pout.op) 6021 { printf("[%d] Iop = x%02x, pout = x%02x\n", i, cs.Iop, pout.op); 6022 assert(0); 6023 } 6024 assert(cs.Iea == pout.ea); 6025 assert(cs.IEV1.Vsize_t == pout.ev1); 6026 assert(cs.IEV2.Vsize_t == pout.ev2); 6027 assert(cs.Iflags == pout.flags); 6028 } 6029 } 6030 } 6031 6032 void simplify_code(code* c) 6033 { 6034 reg_t reg; 6035 if (config.flags4 & CFG4optimized && 6036 (c.Iop == 0x81 || c.Iop == 0x80) && 6037 c.IFL2 == FLconst && 6038 reghasvalue((c.Iop == 0x80) ? BYTEREGS : ALLREGS,I64 ? c.IEV2.Vsize_t : c.IEV2.Vlong,®) && 6039 !(I16 && c.Iflags & CFopsize) 6040 ) 6041 { 6042 // See if we can replace immediate instruction with register instruction 6043 static immutable ubyte[8] regop = 6044 [ 0x00,0x08,0x10,0x18,0x20,0x28,0x30,0x38 ]; 6045 6046 //printf("replacing 0x%02x, val = x%lx\n",c.Iop,c.IEV2.Vlong); 6047 c.Iop = regop[(c.Irm & modregrm(0,7,0)) >> 3] | (c.Iop & 1); 6048 code_newreg(c, reg); 6049 if (I64 && !(c.Iop & 1) && (reg & 4)) 6050 c.Irex |= REX; 6051 } 6052 } 6053 6054 /************************** 6055 * Compute jump addresses for FLcode. 6056 * Note: only works for forward referenced code. 6057 * only direct jumps and branches are detected. 6058 * LOOP instructions only work for backward refs. 6059 */ 6060 6061 void jmpaddr(code *c) 6062 { 6063 code* ci,cn,ctarg,cstart; 6064 targ_size_t ad; 6065 6066 //printf("jmpaddr()\n"); 6067 cstart = c; /* remember start of code */ 6068 while (c) 6069 { 6070 const op = c.Iop; 6071 if (op <= 0xEB && 6072 inssize[op] & T && // if second operand 6073 c.IFL2 == FLcode && 6074 ((op & ~0x0F) == 0x70 || op == JMP || op == JMPS || op == JCXZ || op == CALL)) 6075 { 6076 ci = code_next(c); 6077 ctarg = c.IEV2.Vcode; /* target code */ 6078 ad = 0; /* IP displacement */ 6079 while (ci && ci != ctarg) 6080 { 6081 ad += calccodsize(ci); 6082 ci = code_next(ci); 6083 } 6084 if (!ci) 6085 goto Lbackjmp; // couldn't find it 6086 if (!I16 || op == JMP || op == JMPS || op == JCXZ || op == CALL) 6087 c.IEV2.Vpointer = ad; 6088 else /* else conditional */ 6089 { 6090 if (!(c.Iflags & CFjmp16)) /* if branch */ 6091 c.IEV2.Vpointer = ad; 6092 else /* branch around a long jump */ 6093 { 6094 cn = code_next(c); 6095 c.next = code_calloc(); 6096 code_next(c).next = cn; 6097 c.Iop = op ^ 1; /* converse jmp */ 6098 c.Iflags &= ~CFjmp16; 6099 c.IEV2.Vpointer = I16 ? 3 : 5; 6100 cn = code_next(c); 6101 cn.Iop = JMP; /* long jump */ 6102 cn.IFL2 = FLconst; 6103 cn.IEV2.Vpointer = ad; 6104 } 6105 } 6106 c.IFL2 = FLconst; 6107 } 6108 if (op == LOOP && c.IFL2 == FLcode) /* backwards refs */ 6109 { 6110 Lbackjmp: 6111 ctarg = c.IEV2.Vcode; 6112 for (ci = cstart; ci != ctarg; ci = code_next(ci)) 6113 if (!ci || ci == c) 6114 assert(0); 6115 ad = 2; /* - IP displacement */ 6116 while (ci != c) 6117 { 6118 assert(ci); 6119 ad += calccodsize(ci); 6120 ci = code_next(ci); 6121 } 6122 c.IEV2.Vpointer = (-ad) & 0xFF; 6123 c.IFL2 = FLconst; 6124 } 6125 c = code_next(c); 6126 } 6127 } 6128 6129 /******************************* 6130 * Calculate bl.Bsize. 6131 */ 6132 6133 uint calcblksize(code *c) 6134 { 6135 uint size; 6136 for (size = 0; c; c = code_next(c)) 6137 { 6138 uint sz = calccodsize(c); 6139 //printf("off=%02x, sz = %d, code %p: op=%02x\n", size, sz, c, c.Iop); 6140 size += sz; 6141 } 6142 //printf("calcblksize(c = x%x) = %d\n", c, size); 6143 return size; 6144 } 6145 6146 /***************************** 6147 * Calculate and return code size of a code. 6148 * Note that NOPs are sometimes used as markers, but are 6149 * never output. LINNUMs are never output. 6150 * Note: This routine must be fast. Profiling shows it is significant. 6151 */ 6152 6153 uint calccodsize(code *c) 6154 { 6155 uint size; 6156 ubyte rm,mod,ins; 6157 uint iflags; 6158 uint i32 = I32 || I64; 6159 uint a32 = i32; 6160 6161 debug 6162 assert((a32 & ~1) == 0); 6163 6164 iflags = c.Iflags; 6165 opcode_t op = c.Iop; 6166 if (iflags & CFvex && c.Ivex.pfx == 0xC4) 6167 { 6168 ins = vex_inssize(c); 6169 size = ins & 7; 6170 goto Lmodrm; 6171 } 6172 else if ((op & 0xFF00) == 0x0F00 || (op & 0xFFFD00) == 0x0F3800) 6173 op = 0x0F; 6174 else 6175 op &= 0xFF; 6176 switch (op) 6177 { 6178 case 0x0F: 6179 if ((c.Iop & 0xFFFD00) == 0x0F3800) 6180 { // 3 byte op ( 0F38-- or 0F3A-- ) 6181 ins = inssize2[(c.Iop >> 8) & 0xFF]; 6182 size = ins & 7; 6183 if (c.Iop & 0xFF000000) 6184 size++; 6185 } 6186 else 6187 { // 2 byte op ( 0F-- ) 6188 ins = inssize2[c.Iop & 0xFF]; 6189 size = ins & 7; 6190 if (c.Iop & 0xFF0000) 6191 size++; 6192 } 6193 break; 6194 6195 case NOP: 6196 case ESCAPE: 6197 size = 0; // since these won't be output 6198 goto Lret2; 6199 6200 case ASM: 6201 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6202 size = _tysize[TYnptr]; 6203 else 6204 size = cast(uint)c.IEV1.len; 6205 goto Lret2; 6206 6207 case 0xA1: 6208 case 0xA3: 6209 if (c.Irex) 6210 { 6211 size = 9; // 64 bit immediate value for MOV to/from RAX 6212 goto Lret; 6213 } 6214 goto Ldefault; 6215 6216 case 0xF6: /* TEST mem8,immed8 */ 6217 ins = inssize[op]; 6218 size = ins & 7; 6219 if (i32) 6220 size = inssize32[op]; 6221 if ((c.Irm & (7<<3)) == 0) 6222 size++; /* size of immed8 */ 6223 break; 6224 6225 case 0xF7: 6226 ins = inssize[op]; 6227 size = ins & 7; 6228 if (i32) 6229 size = inssize32[op]; 6230 if ((c.Irm & (7<<3)) == 0) 6231 size += (i32 ^ ((iflags & CFopsize) !=0)) ? 4 : 2; 6232 break; 6233 6234 default: 6235 Ldefault: 6236 ins = inssize[op]; 6237 size = ins & 7; 6238 if (i32) 6239 size = inssize32[op]; 6240 } 6241 6242 if (iflags & (CFwait | CFopsize | CFaddrsize | CFSEG)) 6243 { 6244 if (iflags & CFwait) // if add FWAIT prefix 6245 size++; 6246 if (iflags & CFSEG) // if segment override 6247 size++; 6248 6249 // If the instruction has a second operand that is not an 8 bit, 6250 // and the operand size prefix is present, then fix the size computation 6251 // because the operand size will be different. 6252 // Walter, I had problems with this bit at the end. There can still be 6253 // an ADDRSIZE prefix for these and it does indeed change the operand size. 6254 6255 if (iflags & (CFopsize | CFaddrsize)) 6256 { 6257 if ((ins & (T|E)) == T) 6258 { 6259 if ((op & 0xAC) == 0xA0) 6260 { 6261 if (iflags & CFaddrsize && !I64) 6262 { if (I32) 6263 size -= 2; 6264 else 6265 size += 2; 6266 } 6267 } 6268 else if (iflags & CFopsize) 6269 { if (I16) 6270 size += 2; 6271 else 6272 size -= 2; 6273 } 6274 } 6275 if (iflags & CFaddrsize) 6276 { if (!I64) 6277 a32 ^= 1; 6278 size++; 6279 } 6280 if (iflags & CFopsize) 6281 size++; /* +1 for OPSIZE prefix */ 6282 } 6283 } 6284 6285 Lmodrm: 6286 if ((op & ~0x0F) == 0x70) 6287 { 6288 if (iflags & CFjmp16) // if long branch 6289 size += I16 ? 3 : 4; // + 3(4) bytes for JMP 6290 } 6291 else if (ins & M) // if modregrm byte 6292 { 6293 rm = c.Irm; 6294 mod = rm & 0xC0; 6295 if (a32 || I64) 6296 { // 32 bit addressing 6297 if (issib(rm)) 6298 size++; 6299 switch (mod) 6300 { case 0: 6301 if (issib(rm) && (c.Isib & 7) == 5 || 6302 (rm & 7) == 5) 6303 size += 4; /* disp32 */ 6304 if (c.Irex & REX_B && (rm & 7) == 5) 6305 /* Instead of selecting R13, this mode is an [RIP] relative 6306 * address. Although valid, it's redundant, and should not 6307 * be generated. Instead, generate 0[R13] instead of [R13]. 6308 */ 6309 assert(0); 6310 break; 6311 6312 case 0x40: 6313 size++; /* disp8 */ 6314 break; 6315 6316 case 0x80: 6317 size += 4; /* disp32 */ 6318 break; 6319 6320 default: 6321 break; 6322 } 6323 } 6324 else 6325 { // 16 bit addressing 6326 if (mod == 0x40) /* 01: 8 bit displacement */ 6327 size++; 6328 else if (mod == 0x80 || (mod == 0 && (rm & 7) == 6)) 6329 size += 2; 6330 } 6331 } 6332 6333 Lret: 6334 if (!(iflags & CFvex) && c.Irex) 6335 { 6336 size++; 6337 if (c.Irex & REX_W && (op & ~7) == 0xB8) 6338 size += 4; 6339 } 6340 Lret2: 6341 //printf("op = x%02x, size = %d\n",op,size); 6342 return size; 6343 } 6344 6345 /******************************** 6346 * Return !=0 if codes match. 6347 */ 6348 6349 static if (0) 6350 { 6351 6352 int code_match(code *c1,code *c2) 6353 { 6354 code cs1,cs2; 6355 ubyte ins; 6356 6357 if (c1 == c2) 6358 goto match; 6359 cs1 = *c1; 6360 cs2 = *c2; 6361 if (cs1.Iop != cs2.Iop) 6362 goto nomatch; 6363 switch (cs1.Iop) 6364 { 6365 case ESCAPE | ESCctor: 6366 case ESCAPE | ESCdtor: 6367 goto nomatch; 6368 6369 case NOP: 6370 goto match; 6371 6372 case ASM: 6373 if (cs1.IEV1.len == cs2.IEV1.len && 6374 memcmp(cs1.IEV1.bytes,cs2.IEV1.bytes,cs1.EV1.len) == 0) 6375 goto match; 6376 else 6377 goto nomatch; 6378 6379 default: 6380 if ((cs1.Iop & 0xFF) == ESCAPE) 6381 goto match; 6382 break; 6383 } 6384 if (cs1.Iflags != cs2.Iflags) 6385 goto nomatch; 6386 6387 ins = inssize[cs1.Iop & 0xFF]; 6388 if ((cs1.Iop & 0xFFFD00) == 0x0F3800) 6389 { 6390 ins = inssize2[(cs1.Iop >> 8) & 0xFF]; 6391 } 6392 else if ((cs1.Iop & 0xFF00) == 0x0F00) 6393 { 6394 ins = inssize2[cs1.Iop & 0xFF]; 6395 } 6396 6397 if (ins & M) // if modregrm byte 6398 { 6399 if (cs1.Irm != cs2.Irm) 6400 goto nomatch; 6401 if ((cs1.Irm & 0xC0) == 0xC0) 6402 goto do2; 6403 if (is32bitaddr(I32,cs1.Iflags)) 6404 { 6405 if (issib(cs1.Irm) && cs1.Isib != cs2.Isib) 6406 goto nomatch; 6407 if ( 6408 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 6409 ) 6410 goto do2; /* if no first operand */ 6411 } 6412 else 6413 { 6414 if ( 6415 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 6416 ) 6417 goto do2; /* if no first operand */ 6418 } 6419 if (cs1.IFL1 != cs2.IFL1) 6420 goto nomatch; 6421 if (flinsymtab[cs1.IFL1] && cs1.IEV1.Vsym != cs2.IEV1.Vsym) 6422 goto nomatch; 6423 if (cs1.IEV1.Voffset != cs2.IEV1.Voffset) 6424 goto nomatch; 6425 } 6426 6427 do2: 6428 if (!(ins & T)) // if no second operand 6429 goto match; 6430 if (cs1.IFL2 != cs2.IFL2) 6431 goto nomatch; 6432 if (flinsymtab[cs1.IFL2] && cs1.IEV2.Vsym != cs2.IEV2.Vsym) 6433 goto nomatch; 6434 if (cs1.IEV2.Voffset != cs2.IEV2.Voffset) 6435 goto nomatch; 6436 6437 match: 6438 return 1; 6439 6440 nomatch: 6441 return 0; 6442 } 6443 6444 } 6445 6446 /************************** 6447 * Write code to intermediate file. 6448 * Code starts at offset. 6449 * Returns: 6450 * addr of end of code 6451 */ 6452 6453 private struct MiniCodeBuf 6454 { 6455 nothrow: 6456 size_t index; 6457 size_t offset; 6458 int seg; 6459 char[100] bytes; // = void; 6460 6461 this(int seg) 6462 { 6463 index = 0; 6464 this.offset = cast(size_t)Offset(seg); 6465 this.seg = seg; 6466 } 6467 6468 void flushx() 6469 { 6470 // Emit accumulated bytes to code segment 6471 debug assert(index < bytes.length); 6472 offset += objmod.bytes(seg, offset, cast(uint)index, bytes.ptr); 6473 index = 0; 6474 } 6475 6476 void gen(char c) { bytes[index++] = c; } 6477 6478 void genp(size_t n, void *p) { memcpy(&bytes[index], p, n); index += n; } 6479 6480 void flush() { if (index) flushx(); } 6481 6482 uint getOffset() { return cast(uint)(offset + index); } 6483 6484 uint available() { return cast(uint)(bytes.sizeof - index); } 6485 } 6486 6487 private void do8bit(MiniCodeBuf *pbuf, FL, evc *); 6488 private void do16bit(MiniCodeBuf *pbuf, FL, evc *,int); 6489 private void do32bit(MiniCodeBuf *pbuf, FL, evc *,int,int = 0); 6490 private void do64bit(MiniCodeBuf *pbuf, FL, evc *,int); 6491 6492 uint codout(int seg, code *c) 6493 { 6494 ubyte rm,mod; 6495 ubyte ins; 6496 code *cn; 6497 uint flags; 6498 Symbol *s; 6499 6500 debug 6501 if (debugc) printf("codout(%p), Coffset = x%llx\n",c,cast(ulong)Offset(seg)); 6502 6503 MiniCodeBuf ggen = void; 6504 ggen.index = 0; 6505 ggen.offset = cast(size_t)Offset(seg); 6506 ggen.seg = seg; 6507 6508 for (; c; c = code_next(c)) 6509 { 6510 debug 6511 { 6512 if (debugc) { printf("off=%02u, sz=%u, ", ggen.getOffset(), calccodsize(c)); code_print(c); } 6513 uint startoffset = ggen.getOffset(); 6514 } 6515 6516 opcode_t op = c.Iop; 6517 ins = inssize[op & 0xFF]; 6518 switch (op & 0xFF) 6519 { 6520 case ESCAPE: 6521 /* Check for SSE4 opcode v/pmaxuw xmm1,xmm2/m128 */ 6522 if(op == 0x660F383E || c.Iflags & CFvex) break; 6523 6524 switch (op & 0xFFFF00) 6525 { case ESClinnum: 6526 /* put out line number stuff */ 6527 objmod.linnum(c.IEV1.Vsrcpos,seg,ggen.getOffset()); 6528 break; 6529 version (SCPP) 6530 { 6531 static if (1) 6532 { 6533 case ESCctor: 6534 case ESCdtor: 6535 case ESCoffset: 6536 if (config.exe != EX_WIN32) 6537 except_pair_setoffset(c,ggen.getOffset() - funcoffset); 6538 break; 6539 6540 case ESCmark: 6541 case ESCrelease: 6542 case ESCmark2: 6543 case ESCrelease2: 6544 break; 6545 } 6546 else 6547 { 6548 case ESCctor: 6549 except_push(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null); 6550 break; 6551 6552 case ESCdtor: 6553 except_pop(ggen.getOffset() - funcoffset,c.IEV1.Vtor,null); 6554 break; 6555 6556 case ESCmark: 6557 except_mark(); 6558 break; 6559 6560 case ESCrelease: 6561 except_release(); 6562 break; 6563 } 6564 } 6565 case ESCadjesp: 6566 //printf("adjust ESP %ld\n", (long)c.IEV1.Vint); 6567 break; 6568 6569 default: 6570 break; 6571 } 6572 6573 debug 6574 assert(calccodsize(c) == 0); 6575 6576 continue; 6577 6578 case NOP: /* don't send them out */ 6579 if (op != NOP) 6580 break; 6581 debug 6582 assert(calccodsize(c) == 0); 6583 6584 continue; 6585 6586 case ASM: 6587 if (op != ASM) 6588 break; 6589 ggen.flush(); 6590 if (c.Iflags == CFaddrsize) // kludge for DA inline asm 6591 { 6592 do32bit(&ggen, FLblockoff,&c.IEV1,0); 6593 } 6594 else 6595 { 6596 ggen.offset += objmod.bytes(seg,ggen.offset,cast(uint)c.IEV1.len,c.IEV1.bytes); 6597 } 6598 debug 6599 assert(calccodsize(c) == c.IEV1.len); 6600 6601 continue; 6602 6603 default: 6604 break; 6605 } 6606 flags = c.Iflags; 6607 6608 // See if we need to flush (don't have room for largest code sequence) 6609 if (ggen.available() < (1+4+4+8+8)) 6610 ggen.flush(); 6611 6612 // see if we need to put out prefix bytes 6613 if (flags & (CFwait | CFPREFIX | CFjmp16)) 6614 { 6615 int override_; 6616 6617 if (flags & CFwait) 6618 ggen.gen(0x9B); // FWAIT 6619 /* ? SEGES : SEGSS */ 6620 switch (flags & CFSEG) 6621 { case CFes: override_ = SEGES; goto segover; 6622 case CFss: override_ = SEGSS; goto segover; 6623 case CFcs: override_ = SEGCS; goto segover; 6624 case CFds: override_ = SEGDS; goto segover; 6625 case CFfs: override_ = SEGFS; goto segover; 6626 case CFgs: override_ = SEGGS; goto segover; 6627 segover: ggen.gen(cast(ubyte)override_); 6628 break; 6629 6630 default: break; 6631 } 6632 6633 if (flags & CFaddrsize) 6634 ggen.gen(0x67); 6635 6636 // Do this last because of instructions like ADDPD 6637 if (flags & CFopsize) 6638 ggen.gen(0x66); /* operand size */ 6639 6640 if ((op & ~0x0F) == 0x70 && flags & CFjmp16) /* long condit jmp */ 6641 { 6642 if (!I16) 6643 { // Put out 16 bit conditional jump 6644 c.Iop = op = 0x0F00 | (0x80 | (op & 0x0F)); 6645 } 6646 else 6647 { 6648 cn = code_calloc(); 6649 /*cxcalloc++;*/ 6650 cn.next = code_next(c); 6651 c.next= cn; // link into code 6652 cn.Iop = JMP; // JMP block 6653 cn.IFL2 = c.IFL2; 6654 cn.IEV2.Vblock = c.IEV2.Vblock; 6655 c.Iop = op ^= 1; // toggle condition 6656 c.IFL2 = FLconst; 6657 c.IEV2.Vpointer = I16 ? 3 : 5; // skip over JMP block 6658 c.Iflags &= ~CFjmp16; 6659 } 6660 } 6661 } 6662 6663 if (flags & CFvex) 6664 { 6665 if (flags & CFvex3) 6666 { 6667 ggen.gen(0xC4); 6668 ggen.gen(cast(ubyte)VEX3_B1(c.Ivex)); 6669 ggen.gen(cast(ubyte)VEX3_B2(c.Ivex)); 6670 ggen.gen(c.Ivex.op); 6671 } 6672 else 6673 { 6674 ggen.gen(0xC5); 6675 ggen.gen(cast(ubyte)VEX2_B1(c.Ivex)); 6676 ggen.gen(c.Ivex.op); 6677 } 6678 ins = vex_inssize(c); 6679 goto Lmodrm; 6680 } 6681 6682 if (op > 0xFF) 6683 { 6684 if ((op & 0xFFFD00) == 0x0F3800) 6685 ins = inssize2[(op >> 8) & 0xFF]; 6686 else if ((op & 0xFF00) == 0x0F00) 6687 ins = inssize2[op & 0xFF]; 6688 6689 if (op & 0xFF000000) 6690 { 6691 ubyte op1 = op >> 24; 6692 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 6693 { 6694 ggen.gen(op1); 6695 if (c.Irex) 6696 ggen.gen(c.Irex | REX); 6697 } 6698 else 6699 { 6700 if (c.Irex) 6701 ggen.gen(c.Irex | REX); 6702 ggen.gen(op1); 6703 } 6704 ggen.gen((op >> 16) & 0xFF); 6705 ggen.gen((op >> 8) & 0xFF); 6706 ggen.gen(op & 0xFF); 6707 } 6708 else if (op & 0xFF0000) 6709 { 6710 ubyte op1 = cast(ubyte)(op >> 16); 6711 if (op1 == 0xF2 || op1 == 0xF3 || op1 == 0x66) 6712 { 6713 ggen.gen(op1); 6714 if (c.Irex) 6715 ggen.gen(c.Irex | REX); 6716 } 6717 else 6718 { 6719 if (c.Irex) 6720 ggen.gen(c.Irex | REX); 6721 ggen.gen(op1); 6722 } 6723 ggen.gen((op >> 8) & 0xFF); 6724 ggen.gen(op & 0xFF); 6725 } 6726 else 6727 { 6728 if (c.Irex) 6729 ggen.gen(c.Irex | REX); 6730 ggen.gen((op >> 8) & 0xFF); 6731 ggen.gen(op & 0xFF); 6732 } 6733 } 6734 else 6735 { 6736 if (c.Irex) 6737 ggen.gen(c.Irex | REX); 6738 ggen.gen(cast(ubyte)op); 6739 } 6740 Lmodrm: 6741 if (ins & M) /* if modregrm byte */ 6742 { 6743 rm = c.Irm; 6744 ggen.gen(rm); 6745 6746 // Look for an address size override when working with the 6747 // MOD R/M and SIB bytes 6748 6749 if (is32bitaddr( I32, flags)) 6750 { 6751 if (issib(rm)) 6752 ggen.gen(c.Isib); 6753 switch (rm & 0xC0) 6754 { 6755 case 0x40: 6756 do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1); // 8 bit 6757 break; 6758 6759 case 0: 6760 if (!(issib(rm) && (c.Isib & 7) == 5 || 6761 (rm & 7) == 5)) 6762 break; 6763 goto case 0x80; 6764 6765 case 0x80: 6766 { 6767 int cfflags = CFoff; 6768 targ_size_t val = 0; 6769 if (I64) 6770 { 6771 if ((rm & modregrm(3,0,7)) == modregrm(0,0,5)) // if disp32[RIP] 6772 { 6773 cfflags |= CFpc32; 6774 val = -4; 6775 reg_t reg = rm & modregrm(0,7,0); 6776 if (ins & T || 6777 ((op == 0xF6 || op == 0xF7) && (reg == modregrm(0,0,0) || reg == modregrm(0,1,0)))) 6778 { if (ins & E || op == 0xF6) 6779 val = -5; 6780 else if (c.Iflags & CFopsize) 6781 val = -6; 6782 else 6783 val = -8; 6784 } 6785 static if (TARGET_OSX || TARGET_WINDOS) 6786 { 6787 /* Mach-O and Win64 fixups already take the 4 byte size 6788 * into account, so bias by 4 6789 ` */ 6790 val += 4; 6791 } 6792 } 6793 } 6794 do32bit(&ggen, cast(FL)c.IFL1,&c.IEV1,cfflags,cast(int)val); 6795 break; 6796 } 6797 6798 default: 6799 break; 6800 } 6801 } 6802 else 6803 { 6804 switch (rm & 0xC0) 6805 { case 0x40: 6806 do8bit(&ggen, cast(FL) c.IFL1,&c.IEV1); // 8 bit 6807 break; 6808 6809 case 0: 6810 if ((rm & 7) != 6) 6811 break; 6812 goto case 0x80; 6813 6814 case 0x80: 6815 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,CFoff); 6816 break; 6817 6818 default: 6819 break; 6820 } 6821 } 6822 } 6823 else 6824 { 6825 if (op == 0xC8) 6826 do16bit(&ggen, cast(FL)c.IFL1,&c.IEV1,0); 6827 } 6828 flags &= CFseg | CFoff | CFselfrel; 6829 if (ins & T) /* if second operand */ 6830 { 6831 if (ins & E) /* if data-8 */ 6832 do8bit(&ggen, cast(FL) c.IFL2,&c.IEV2); 6833 else if (!I16) 6834 { 6835 switch (op) 6836 { 6837 case 0xC2: /* RETN imm16 */ 6838 case 0xCA: /* RETF imm16 */ 6839 do16: 6840 do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 6841 break; 6842 6843 case 0xA1: 6844 case 0xA3: 6845 if (I64 && c.Irex) 6846 { 6847 do64: 6848 do64bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 6849 break; 6850 } 6851 goto case 0xA0; 6852 6853 case 0xA0: /* MOV AL,byte ptr [] */ 6854 case 0xA2: 6855 if (c.Iflags & CFaddrsize && !I64) 6856 goto do16; 6857 else 6858 do32: 6859 do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 6860 break; 6861 6862 case 0x9A: 6863 case 0xEA: 6864 if (c.Iflags & CFopsize) 6865 goto ptr1616; 6866 else 6867 goto ptr1632; 6868 6869 case 0x68: // PUSH immed32 6870 if (cast(FL)c.IFL2 == FLblock) 6871 { 6872 c.IFL2 = FLblockoff; 6873 goto do32; 6874 } 6875 else 6876 goto case_default; 6877 6878 case CALL: // CALL rel 6879 case JMP: // JMP rel 6880 flags |= CFselfrel; 6881 goto case_default; 6882 6883 default: 6884 if ((op|0xF) == 0x0F8F) // Jcc rel16 rel32 6885 flags |= CFselfrel; 6886 if (I64 && (op & ~7) == 0xB8 && c.Irex & REX_W) 6887 goto do64; 6888 case_default: 6889 if (c.Iflags & CFopsize) 6890 goto do16; 6891 else 6892 goto do32; 6893 } 6894 } 6895 else 6896 { 6897 switch (op) 6898 { 6899 case 0xC2: 6900 case 0xCA: 6901 goto do16; 6902 6903 case 0xA0: 6904 case 0xA1: 6905 case 0xA2: 6906 case 0xA3: 6907 if (c.Iflags & CFaddrsize) 6908 goto do32; 6909 else 6910 goto do16; 6911 6912 case 0x9A: 6913 case 0xEA: 6914 if (c.Iflags & CFopsize) 6915 goto ptr1632; 6916 else 6917 goto ptr1616; 6918 6919 ptr1616: 6920 ptr1632: 6921 //assert(c.IFL2 == FLfunc); 6922 ggen.flush(); 6923 if (c.IFL2 == FLdatseg) 6924 { 6925 objmod.reftodatseg(seg,ggen.offset,c.IEV2.Vpointer, 6926 c.IEV2.Vseg,flags); 6927 ggen.offset += 4; 6928 } 6929 else 6930 { 6931 s = c.IEV2.Vsym; 6932 ggen.offset += objmod.reftoident(seg,ggen.offset,s,0,flags); 6933 } 6934 break; 6935 6936 case 0x68: // PUSH immed16 6937 if (cast(FL)c.IFL2 == FLblock) 6938 { c.IFL2 = FLblockoff; 6939 goto do16; 6940 } 6941 else 6942 goto case_default16; 6943 6944 case CALL: 6945 case JMP: 6946 flags |= CFselfrel; 6947 goto default; 6948 6949 default: 6950 case_default16: 6951 if (c.Iflags & CFopsize) 6952 goto do32; 6953 else 6954 goto do16; 6955 } 6956 } 6957 } 6958 else if (op == 0xF6) /* TEST mem8,immed8 */ 6959 { 6960 if ((rm & (7<<3)) == 0) 6961 do8bit(&ggen, cast(FL)c.IFL2,&c.IEV2); 6962 } 6963 else if (op == 0xF7) 6964 { 6965 if ((rm & (7<<3)) == 0) /* TEST mem16/32,immed16/32 */ 6966 { 6967 if ((I32 || I64) ^ ((c.Iflags & CFopsize) != 0)) 6968 do32bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 6969 else 6970 do16bit(&ggen, cast(FL)c.IFL2,&c.IEV2,flags); 6971 } 6972 } 6973 6974 debug 6975 if (ggen.getOffset() - startoffset != calccodsize(c)) 6976 { 6977 printf("actual: %d, calc: %d\n", cast(int)(ggen.getOffset() - startoffset), cast(int)calccodsize(c)); 6978 code_print(c); 6979 assert(0); 6980 } 6981 } 6982 ggen.flush(); 6983 Offset(seg) = ggen.offset; 6984 //printf("-codout(), Coffset = x%x\n", Offset(seg)); 6985 return cast(uint)ggen.offset; /* ending address */ 6986 } 6987 6988 6989 private void do64bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags) 6990 { 6991 char *p; 6992 Symbol *s; 6993 targ_size_t ad; 6994 6995 assert(I64); 6996 switch (fl) 6997 { 6998 case FLconst: 6999 ad = *cast(targ_size_t *) uev; 7000 L1: 7001 pbuf.genp(8,&ad); 7002 return; 7003 7004 case FLdatseg: 7005 pbuf.flush(); 7006 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,CFoffset64 | flags); 7007 break; 7008 7009 case FLframehandler: 7010 framehandleroffset = pbuf.getOffset(); 7011 ad = 0; 7012 goto L1; 7013 7014 case FLswitch: 7015 pbuf.flush(); 7016 ad = uev.Vswitch.Btableoffset; 7017 if (config.flags & CFGromable) 7018 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7019 else 7020 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7021 break; 7022 7023 case FLcsdata: 7024 case FLfardata: 7025 //symbol_print(uev.Vsym); 7026 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7027 // strings and statics are treated like offsets from a 7028 // un-named external with is the start of .rodata or .data 7029 case FLextern: /* external data symbol */ 7030 case FLtlsdata: 7031 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 7032 { 7033 case FLgot: 7034 case FLgotoff: 7035 } 7036 pbuf.flush(); 7037 s = uev.Vsym; /* symbol pointer */ 7038 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,CFoffset64 | flags); 7039 break; 7040 7041 static if (TARGET_OSX) 7042 { 7043 case FLgot: 7044 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7045 ad = 0; 7046 goto L1; 7047 } 7048 7049 case FLfunc: /* function call */ 7050 s = uev.Vsym; /* symbol pointer */ 7051 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7052 pbuf.flush(); 7053 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,CFoffset64 | flags); 7054 break; 7055 7056 case FLblock: /* displacement to another block */ 7057 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7058 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7059 goto L1; 7060 7061 case FLblockoff: 7062 pbuf.flush(); 7063 assert(uev.Vblock); 7064 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7065 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7066 break; 7067 7068 default: 7069 WRFL(fl); 7070 assert(0); 7071 } 7072 pbuf.offset += 8; 7073 } 7074 7075 7076 private void do32bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags, int val) 7077 { 7078 char *p; 7079 Symbol *s; 7080 targ_size_t ad; 7081 7082 //printf("do32bit(flags = x%x)\n", flags); 7083 switch (fl) 7084 { 7085 case FLconst: 7086 assert(targ_size_t.sizeof == 4 || targ_size_t.sizeof == 8); 7087 ad = * cast(targ_size_t *) uev; 7088 L1: 7089 pbuf.genp(4,&ad); 7090 return; 7091 7092 case FLdatseg: 7093 pbuf.flush(); 7094 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7095 break; 7096 7097 case FLframehandler: 7098 framehandleroffset = pbuf.getOffset(); 7099 ad = 0; 7100 goto L1; 7101 7102 case FLswitch: 7103 pbuf.flush(); 7104 ad = uev.Vswitch.Btableoffset; 7105 if (config.flags & CFGromable) 7106 { 7107 static if (TARGET_OSX) 7108 { 7109 // These are magic values based on the exact code generated for the switch jump 7110 if (I64) 7111 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7112 else 7113 uev.Vswitch.Btablebase = pbuf.getOffset() + 4 - 8; 7114 ad -= uev.Vswitch.Btablebase; 7115 goto L1; 7116 } 7117 else static if (TARGET_WINDOS) 7118 { 7119 if (I64) 7120 { 7121 uev.Vswitch.Btablebase = pbuf.getOffset() + 4; 7122 ad -= uev.Vswitch.Btablebase; 7123 goto L1; 7124 } 7125 else 7126 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7127 } 7128 else 7129 { 7130 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7131 } 7132 } 7133 else 7134 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7135 break; 7136 7137 case FLcode: 7138 //assert(JMPJMPTABLE); // the only use case 7139 pbuf.flush(); 7140 ad = *cast(targ_size_t *) uev + pbuf.getOffset(); 7141 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7142 break; 7143 7144 case FLcsdata: 7145 case FLfardata: 7146 //symbol_print(uev.Vsym); 7147 7148 // NOTE: In ELFOBJ all symbol refs have been tagged FLextern 7149 // strings and statics are treated like offsets from a 7150 // un-named external with is the start of .rodata or .data 7151 case FLextern: /* external data symbol */ 7152 case FLtlsdata: 7153 static if (TARGET_LINUX || TARGET_FREEBSD || TARGET_OPENBSD || TARGET_DRAGONFLYBSD || TARGET_SOLARIS) 7154 { 7155 case FLgot: 7156 case FLgotoff: 7157 } 7158 pbuf.flush(); 7159 s = uev.Vsym; /* symbol pointer */ 7160 if (TARGET_WINDOS && I64 && (flags & CFpc32)) 7161 { 7162 /* This is for those funky fixups where the location to be fixed up 7163 * is a 'val' amount back from the current RIP, biased by adding 4. 7164 */ 7165 assert(val >= -5 && val <= 0); 7166 flags |= (-val & 7) << 24; // set CFREL value 7167 assert(CFREL == (7 << 24)); 7168 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7169 } 7170 else 7171 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset + val,flags); 7172 break; 7173 7174 static if (TARGET_OSX) 7175 { 7176 case FLgot: 7177 funcsym_p.Slocalgotoffset = pbuf.getOffset(); 7178 ad = 0; 7179 goto L1; 7180 } 7181 7182 case FLfunc: /* function call */ 7183 s = uev.Vsym; /* symbol pointer */ 7184 if (tyfarfunc(s.ty())) 7185 { /* Large code references are always absolute */ 7186 pbuf.flush(); 7187 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 4; 7188 } 7189 else if (s.Sseg == pbuf.seg && 7190 (s.Sclass == SCstatic || s.Sclass == SCglobal) && 7191 s.Sxtrnnum == 0 && flags & CFselfrel) 7192 { /* if we know it's relative address */ 7193 ad = s.Soffset - pbuf.getOffset() - 4; 7194 goto L1; 7195 } 7196 else 7197 { 7198 assert(TARGET_SEGMENTED || !tyfarfunc(s.ty())); 7199 pbuf.flush(); 7200 objmod.reftoident(pbuf.seg,pbuf.offset,s,val,flags); 7201 } 7202 break; 7203 7204 case FLblock: /* displacement to another block */ 7205 ad = uev.Vblock.Boffset - pbuf.getOffset() - 4; 7206 //printf("FLblock: funcoffset = %x, pbuf.getOffset = %x, Boffset = %x, ad = %x\n", funcoffset, pbuf.getOffset(), uev.Vblock.Boffset, ad); 7207 goto L1; 7208 7209 case FLblockoff: 7210 pbuf.flush(); 7211 assert(uev.Vblock); 7212 //printf("FLblockoff: offset = %x, Boffset = %x, funcoffset = %x\n", pbuf.offset, uev.Vblock.Boffset, funcoffset); 7213 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7214 break; 7215 7216 default: 7217 WRFL(fl); 7218 assert(0); 7219 } 7220 pbuf.offset += 4; 7221 } 7222 7223 7224 private void do16bit(MiniCodeBuf *pbuf, FL fl, evc *uev,int flags) 7225 { 7226 char *p; 7227 Symbol *s; 7228 targ_size_t ad; 7229 7230 switch (fl) 7231 { 7232 case FLconst: 7233 pbuf.genp(2,cast(char *) uev); 7234 return; 7235 7236 case FLdatseg: 7237 pbuf.flush(); 7238 objmod.reftodatseg(pbuf.seg,pbuf.offset,uev.Vpointer,uev.Vseg,flags); 7239 break; 7240 7241 case FLswitch: 7242 pbuf.flush(); 7243 ad = uev.Vswitch.Btableoffset; 7244 if (config.flags & CFGromable) 7245 objmod.reftocodeseg(pbuf.seg,pbuf.offset,ad); 7246 else 7247 objmod.reftodatseg(pbuf.seg,pbuf.offset,ad,objmod.jmpTableSegment(funcsym_p),CFoff); 7248 break; 7249 7250 case FLcsdata: 7251 case FLfardata: 7252 case FLextern: /* external data symbol */ 7253 case FLtlsdata: 7254 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7255 pbuf.flush(); 7256 s = uev.Vsym; /* symbol pointer */ 7257 objmod.reftoident(pbuf.seg,pbuf.offset,s,uev.Voffset,flags); 7258 break; 7259 7260 case FLfunc: /* function call */ 7261 //assert(SIXTEENBIT || TARGET_SEGMENTED); 7262 s = uev.Vsym; /* symbol pointer */ 7263 if (tyfarfunc(s.ty())) 7264 { /* Large code references are always absolute */ 7265 pbuf.flush(); 7266 pbuf.offset += objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags) - 2; 7267 } 7268 else if (s.Sseg == pbuf.seg && 7269 (s.Sclass == SCstatic || s.Sclass == SCglobal) && 7270 s.Sxtrnnum == 0 && flags & CFselfrel) 7271 { /* if we know it's relative address */ 7272 ad = s.Soffset - pbuf.getOffset() - 2; 7273 goto L1; 7274 } 7275 else 7276 { 7277 pbuf.flush(); 7278 objmod.reftoident(pbuf.seg,pbuf.offset,s,0,flags); 7279 } 7280 break; 7281 7282 case FLblock: /* displacement to another block */ 7283 ad = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7284 debug 7285 { 7286 targ_ptrdiff_t delta = uev.Vblock.Boffset - pbuf.getOffset() - 2; 7287 assert(cast(short)delta == delta); 7288 } 7289 L1: 7290 pbuf.genp(2,&ad); // displacement 7291 return; 7292 7293 case FLblockoff: 7294 pbuf.flush(); 7295 objmod.reftocodeseg(pbuf.seg,pbuf.offset,uev.Vblock.Boffset); 7296 break; 7297 7298 default: 7299 WRFL(fl); 7300 assert(0); 7301 } 7302 pbuf.offset += 2; 7303 } 7304 7305 7306 private void do8bit(MiniCodeBuf *pbuf, FL fl, evc *uev) 7307 { 7308 char c; 7309 targ_ptrdiff_t delta; 7310 7311 switch (fl) 7312 { 7313 case FLconst: 7314 c = cast(char)uev.Vuns; 7315 break; 7316 7317 case FLblock: 7318 delta = uev.Vblock.Boffset - pbuf.getOffset() - 1; 7319 if (cast(byte)delta != delta) 7320 { 7321 version (MARS) 7322 { 7323 if (uev.Vblock.Bsrcpos.Slinnum) 7324 printf("%s(%d): ", uev.Vblock.Bsrcpos.Sfilename, uev.Vblock.Bsrcpos.Slinnum); 7325 } 7326 printf("block displacement of %lld exceeds the maximum offset of -128 to 127.\n", cast(long)delta); 7327 err_exit(); 7328 } 7329 c = cast(char)delta; 7330 debug assert(uev.Vblock.Boffset > pbuf.getOffset() || c != 0x7F); 7331 break; 7332 7333 default: 7334 debug printf("fl = %d\n",fl); 7335 assert(0); 7336 } 7337 pbuf.gen(c); 7338 } 7339 7340 7341 /********************************** 7342 */ 7343 7344 version (SCPP) 7345 { 7346 static if (HYDRATE) 7347 { 7348 void code_hydrate(code **pc) 7349 { 7350 code *c; 7351 ubyte ins,rm; 7352 FL fl; 7353 7354 assert(pc); 7355 while (*pc) 7356 { 7357 c = cast(code *) ph_hydrate(cast(void**)pc); 7358 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7359 ins = vex_inssize(c); 7360 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7361 ins = inssize2[(c.Iop >> 8) & 0xFF]; 7362 else if ((c.Iop & 0xFF00) == 0x0F00) 7363 ins = inssize2[c.Iop & 0xFF]; 7364 else 7365 ins = inssize[c.Iop & 0xFF]; 7366 switch (c.Iop) 7367 { 7368 default: 7369 break; 7370 7371 case ESCAPE | ESClinnum: 7372 srcpos_hydrate(&c.IEV1.Vsrcpos); 7373 goto done; 7374 7375 case ESCAPE | ESCctor: 7376 case ESCAPE | ESCdtor: 7377 el_hydrate(&c.IEV1.Vtor); 7378 goto done; 7379 7380 case ASM: 7381 ph_hydrate(cast(void**)&c.IEV1.bytes); 7382 goto done; 7383 } 7384 if (!(ins & M) || 7385 ((rm = c.Irm) & 0xC0) == 0xC0) 7386 goto do2; /* if no first operand */ 7387 if (is32bitaddr(I32,c.Iflags)) 7388 { 7389 7390 if ( 7391 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 7392 ) 7393 goto do2; /* if no first operand */ 7394 } 7395 else 7396 { 7397 if ( 7398 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 7399 ) 7400 goto do2; /* if no first operand */ 7401 } 7402 fl = cast(FL) c.IFL1; 7403 switch (fl) 7404 { 7405 case FLudata: 7406 case FLdata: 7407 case FLreg: 7408 case FLauto: 7409 case FLfast: 7410 case FLbprel: 7411 case FLpara: 7412 case FLcsdata: 7413 case FLfardata: 7414 case FLtlsdata: 7415 case FLfunc: 7416 case FLpseudo: 7417 case FLextern: 7418 assert(flinsymtab[fl]); 7419 symbol_hydrate(&c.IEV1.Vsym); 7420 symbol_debug(c.IEV1.Vsym); 7421 break; 7422 7423 case FLdatseg: 7424 case FLfltreg: 7425 case FLallocatmp: 7426 case FLcs: 7427 case FLndp: 7428 case FLoffset: 7429 case FLlocalsize: 7430 case FLconst: 7431 case FLframehandler: 7432 assert(!flinsymtab[fl]); 7433 break; 7434 7435 case FLcode: 7436 ph_hydrate(cast(void**)&c.IEV1.Vcode); 7437 break; 7438 7439 case FLblock: 7440 case FLblockoff: 7441 ph_hydrate(cast(void**)&c.IEV1.Vblock); 7442 break; 7443 version (SCPP) 7444 { 7445 case FLctor: 7446 case FLdtor: 7447 el_hydrate(cast(elem**)&c.IEV1.Vtor); 7448 break; 7449 } 7450 case FLasm: 7451 ph_hydrate(cast(void**)&c.IEV1.bytes); 7452 break; 7453 7454 default: 7455 WRFL(fl); 7456 assert(0); 7457 } 7458 do2: 7459 /* Ignore TEST (F6 and F7) opcodes */ 7460 if (!(ins & T)) 7461 goto done; /* if no second operand */ 7462 7463 fl = cast(FL) c.IFL2; 7464 switch (fl) 7465 { 7466 case FLudata: 7467 case FLdata: 7468 case FLreg: 7469 case FLauto: 7470 case FLfast: 7471 case FLbprel: 7472 case FLpara: 7473 case FLcsdata: 7474 case FLfardata: 7475 case FLtlsdata: 7476 case FLfunc: 7477 case FLpseudo: 7478 case FLextern: 7479 assert(flinsymtab[fl]); 7480 symbol_hydrate(&c.IEV2.Vsym); 7481 symbol_debug(c.IEV2.Vsym); 7482 break; 7483 7484 case FLdatseg: 7485 case FLfltreg: 7486 case FLallocatmp: 7487 case FLcs: 7488 case FLndp: 7489 case FLoffset: 7490 case FLlocalsize: 7491 case FLconst: 7492 case FLframehandler: 7493 assert(!flinsymtab[fl]); 7494 break; 7495 7496 case FLcode: 7497 ph_hydrate(cast(void**)&c.IEV2.Vcode); 7498 break; 7499 7500 case FLblock: 7501 case FLblockoff: 7502 ph_hydrate(cast(void**)&c.IEV2.Vblock); 7503 break; 7504 7505 default: 7506 WRFL(fl); 7507 assert(0); 7508 } 7509 done: 7510 { } 7511 7512 pc = &c.next; 7513 } 7514 } 7515 } 7516 7517 /********************************** 7518 */ 7519 7520 static if (DEHYDRATE) 7521 { 7522 void code_dehydrate(code **pc) 7523 { 7524 code *c; 7525 ubyte ins,rm; 7526 FL fl; 7527 7528 while ((c = *pc) != null) 7529 { 7530 ph_dehydrate(pc); 7531 7532 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7533 ins = vex_inssize(c); 7534 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7535 ins = inssize2[(c.Iop >> 8) & 0xFF]; 7536 else if ((c.Iop & 0xFF00) == 0x0F00) 7537 ins = inssize2[c.Iop & 0xFF]; 7538 else 7539 ins = inssize[c.Iop & 0xFF]; 7540 switch (c.Iop) 7541 { 7542 default: 7543 break; 7544 7545 case ESCAPE | ESClinnum: 7546 srcpos_dehydrate(&c.IEV1.Vsrcpos); 7547 goto done; 7548 7549 case ESCAPE | ESCctor: 7550 case ESCAPE | ESCdtor: 7551 el_dehydrate(&c.IEV1.Vtor); 7552 goto done; 7553 7554 case ASM: 7555 ph_dehydrate(&c.IEV1.bytes); 7556 goto done; 7557 } 7558 7559 if (!(ins & M) || 7560 ((rm = c.Irm) & 0xC0) == 0xC0) 7561 goto do2; /* if no first operand */ 7562 if (is32bitaddr(I32,c.Iflags)) 7563 { 7564 7565 if ( 7566 ((rm & 0xC0) == 0 && !((rm & 7) == 4 && (c.Isib & 7) == 5 || (rm & 7) == 5)) 7567 ) 7568 goto do2; /* if no first operand */ 7569 } 7570 else 7571 { 7572 if ( 7573 ((rm & 0xC0) == 0 && !((rm & 7) == 6)) 7574 ) 7575 goto do2; /* if no first operand */ 7576 } 7577 fl = cast(FL) c.IFL1; 7578 switch (fl) 7579 { 7580 case FLudata: 7581 case FLdata: 7582 case FLreg: 7583 case FLauto: 7584 case FLfast: 7585 case FLbprel: 7586 case FLpara: 7587 case FLcsdata: 7588 case FLfardata: 7589 case FLtlsdata: 7590 case FLfunc: 7591 case FLpseudo: 7592 case FLextern: 7593 assert(flinsymtab[fl]); 7594 symbol_dehydrate(&c.IEV1.Vsym); 7595 break; 7596 7597 case FLdatseg: 7598 case FLfltreg: 7599 case FLallocatmp: 7600 case FLcs: 7601 case FLndp: 7602 case FLoffset: 7603 case FLlocalsize: 7604 case FLconst: 7605 case FLframehandler: 7606 assert(!flinsymtab[fl]); 7607 break; 7608 7609 case FLcode: 7610 ph_dehydrate(&c.IEV1.Vcode); 7611 break; 7612 7613 case FLblock: 7614 case FLblockoff: 7615 ph_dehydrate(&c.IEV1.Vblock); 7616 break; 7617 version (SCPP) 7618 { 7619 case FLctor: 7620 case FLdtor: 7621 el_dehydrate(&c.IEV1.Vtor); 7622 break; 7623 } 7624 case FLasm: 7625 ph_dehydrate(&c.IEV1.bytes); 7626 break; 7627 7628 default: 7629 WRFL(fl); 7630 assert(0); 7631 break; 7632 } 7633 do2: 7634 /* Ignore TEST (F6 and F7) opcodes */ 7635 if (!(ins & T)) 7636 goto done; /* if no second operand */ 7637 7638 fl = cast(FL) c.IFL2; 7639 switch (fl) 7640 { 7641 case FLudata: 7642 case FLdata: 7643 case FLreg: 7644 case FLauto: 7645 case FLfast: 7646 case FLbprel: 7647 case FLpara: 7648 case FLcsdata: 7649 case FLfardata: 7650 case FLtlsdata: 7651 case FLfunc: 7652 case FLpseudo: 7653 case FLextern: 7654 assert(flinsymtab[fl]); 7655 symbol_dehydrate(&c.IEV2.Vsym); 7656 break; 7657 7658 case FLdatseg: 7659 case FLfltreg: 7660 case FLallocatmp: 7661 case FLcs: 7662 case FLndp: 7663 case FLoffset: 7664 case FLlocalsize: 7665 case FLconst: 7666 case FLframehandler: 7667 assert(!flinsymtab[fl]); 7668 break; 7669 7670 case FLcode: 7671 ph_dehydrate(&c.IEV2.Vcode); 7672 break; 7673 7674 case FLblock: 7675 case FLblockoff: 7676 ph_dehydrate(&c.IEV2.Vblock); 7677 break; 7678 7679 default: 7680 WRFL(fl); 7681 assert(0); 7682 break; 7683 } 7684 done: 7685 pc = &code_next(c); 7686 } 7687 } 7688 } 7689 } 7690 7691 /*************************** 7692 * Debug code to dump code structure. 7693 */ 7694 7695 void WRcodlst(code *c) 7696 { 7697 for (; c; c = code_next(c)) 7698 code_print(c); 7699 } 7700 7701 extern (C) void code_print(code* c) 7702 { 7703 ubyte ins; 7704 ubyte rexb; 7705 7706 if (c == null) 7707 { 7708 printf("code 0\n"); 7709 return; 7710 } 7711 7712 const op = c.Iop; 7713 if (c.Iflags & CFvex && c.Ivex.pfx == 0xC4) 7714 ins = vex_inssize(c); 7715 else if ((c.Iop & 0xFFFD00) == 0x0F3800) 7716 ins = inssize2[(op >> 8) & 0xFF]; 7717 else if ((c.Iop & 0xFF00) == 0x0F00) 7718 ins = inssize2[op & 0xFF]; 7719 else 7720 ins = inssize[op & 0xFF]; 7721 7722 printf("code %p: nxt=%p ",c,code_next(c)); 7723 7724 if (c.Iflags & CFvex) 7725 { 7726 if (c.Iflags & CFvex3) 7727 { 7728 printf("vex=0xC4"); 7729 printf(" 0x%02X", VEX3_B1(c.Ivex)); 7730 printf(" 0x%02X", VEX3_B2(c.Ivex)); 7731 rexb = 7732 ( c.Ivex.w ? REX_W : 0) | 7733 (!c.Ivex.r ? REX_R : 0) | 7734 (!c.Ivex.x ? REX_X : 0) | 7735 (!c.Ivex.b ? REX_B : 0); 7736 } 7737 else 7738 { 7739 printf("vex=0xC5"); 7740 printf(" 0x%02X", VEX2_B1(c.Ivex)); 7741 rexb = !c.Ivex.r ? REX_R : 0; 7742 } 7743 printf(" "); 7744 } 7745 else 7746 rexb = c.Irex; 7747 7748 if (rexb) 7749 { 7750 printf("rex=0x%02X ", c.Irex); 7751 if (rexb & REX_W) 7752 printf("W"); 7753 if (rexb & REX_R) 7754 printf("R"); 7755 if (rexb & REX_X) 7756 printf("X"); 7757 if (rexb & REX_B) 7758 printf("B"); 7759 printf(" "); 7760 } 7761 printf("op=0x%02X",op); 7762 7763 if ((op & 0xFF) == ESCAPE) 7764 { 7765 if ((op & 0xFF00) == ESClinnum) 7766 { 7767 printf(" linnum = %d\n",c.IEV1.Vsrcpos.Slinnum); 7768 return; 7769 } 7770 printf(" ESCAPE %d",c.Iop >> 8); 7771 } 7772 if (c.Iflags) 7773 printf(" flg=%x",c.Iflags); 7774 if (ins & M) 7775 { 7776 uint rm = c.Irm; 7777 printf(" rm=0x%02X=%d,%d,%d",rm,(rm>>6)&3,(rm>>3)&7,rm&7); 7778 if (!I16 && issib(rm)) 7779 { 7780 ubyte sib = c.Isib; 7781 printf(" sib=%02x=%d,%d,%d",sib,(sib>>6)&3,(sib>>3)&7,sib&7); 7782 } 7783 if ((rm & 0xC7) == BPRM || (rm & 0xC0) == 0x80 || (rm & 0xC0) == 0x40) 7784 { 7785 switch (c.IFL1) 7786 { 7787 case FLconst: 7788 case FLoffset: 7789 printf(" int = %4d",c.IEV1.Vuns); 7790 break; 7791 7792 case FLblock: 7793 printf(" block = %p",c.IEV1.Vblock); 7794 break; 7795 7796 case FLswitch: 7797 case FLblockoff: 7798 case FLlocalsize: 7799 case FLframehandler: 7800 case 0: 7801 break; 7802 7803 case FLdatseg: 7804 printf(" %d.%llx",c.IEV1.Vseg,cast(ulong)c.IEV1.Vpointer); 7805 break; 7806 7807 case FLauto: 7808 case FLfast: 7809 case FLreg: 7810 case FLdata: 7811 case FLudata: 7812 case FLpara: 7813 case FLbprel: 7814 case FLtlsdata: 7815 printf(" sym='%s'",c.IEV1.Vsym.Sident.ptr); 7816 break; 7817 7818 case FLextern: 7819 printf(" FLextern offset = %4d",cast(int)c.IEV1.Voffset); 7820 break; 7821 7822 default: 7823 WRFL(cast(FL)c.IFL1); 7824 break; 7825 } 7826 } 7827 } 7828 if (ins & T) 7829 { 7830 printf(" "); 7831 WRFL(cast(FL)c.IFL2); 7832 switch (c.IFL2) 7833 { 7834 case FLconst: 7835 printf(" int = %4d",c.IEV2.Vuns); 7836 break; 7837 7838 case FLblock: 7839 printf(" block = %p",c.IEV2.Vblock); 7840 break; 7841 7842 case FLswitch: 7843 case FLblockoff: 7844 case 0: 7845 case FLlocalsize: 7846 case FLframehandler: 7847 break; 7848 7849 case FLdatseg: 7850 printf(" %d.%llx",c.IEV2.Vseg,cast(ulong)c.IEV2.Vpointer); 7851 break; 7852 7853 case FLauto: 7854 case FLfast: 7855 case FLreg: 7856 case FLpara: 7857 case FLbprel: 7858 case FLfunc: 7859 case FLdata: 7860 case FLudata: 7861 case FLtlsdata: 7862 printf(" sym='%s'",c.IEV2.Vsym.Sident.ptr); 7863 break; 7864 7865 case FLcode: 7866 printf(" code = %p",c.IEV2.Vcode); 7867 break; 7868 7869 default: 7870 WRFL(cast(FL)c.IFL2); 7871 break; 7872 } 7873 } 7874 printf("\n"); 7875 } 7876 7877 }