1 /** 2 * Extract symbols from an OMF object file. 3 * 4 * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanomf.d, _scanomf.d) 8 * Documentation: https://dlang.org/phobos/dmd_scanomf.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanomf.d 10 */ 11 12 module dmd.scanomf; 13 14 version(Windows): 15 16 import core.stdc..string; 17 import core.stdc.stdlib; 18 import dmd.globals; 19 import dmd.root.rmem; 20 import dmd.root.outbuffer; 21 import dmd.root..string; 22 import dmd.arraytypes; 23 import dmd.errors; 24 25 private enum LOG = false; 26 27 /***************************************** 28 * Reads an object module from base[] and passes the names 29 * of any exported symbols to (*pAddSymbol)(). 30 * Params: 31 * pAddSymbol = function to pass the names to 32 * base = array of contents of object module 33 * module_name = name of the object module (used for error messages) 34 * loc = location to use for error printing 35 */ 36 void scanOmfObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol, 37 const(ubyte)[] base, const(char)* module_name, Loc loc) 38 { 39 static if (LOG) 40 { 41 printf("scanOmfObjModule(%s)\n", module_name); 42 } 43 int easyomf; 44 char[LIBIDMAX + 1] name; 45 Strings names; 46 scope(exit) 47 for (size_t u = 1; u < names.dim; u++) 48 free(cast(void*)names[u]); 49 names.push(null); // don't use index 0 50 easyomf = 0; // assume not EASY-OMF 51 auto pend = cast(const(ubyte)*)base.ptr + base.length; 52 const(ubyte)* pnext; 53 for (auto p = cast(const(ubyte)*)base.ptr; 1; p = pnext) 54 { 55 assert(p < pend); 56 ubyte recTyp = *p++; 57 ushort recLen = *cast(ushort*)p; 58 p += 2; 59 pnext = p + recLen; 60 recLen--; // forget the checksum 61 switch (recTyp) 62 { 63 case LNAMES: 64 case LLNAMES: 65 while (p + 1 < pnext) 66 { 67 parseName(&p, name.ptr); 68 char* copy = cast(char*)Mem.check(strdup(name.ptr)); 69 names.push(copy); 70 } 71 break; 72 case PUBDEF: 73 if (easyomf) 74 recTyp = PUB386; // convert to MS format 75 goto case; 76 case PUB386: 77 if (!(parseIdx(&p) | parseIdx(&p))) 78 p += 2; // skip seg, grp, frame 79 while (p + 1 < pnext) 80 { 81 parseName(&p, name.ptr); 82 p += (recTyp == PUBDEF) ? 2 : 4; // skip offset 83 parseIdx(&p); // skip type index 84 pAddSymbol(name[0 .. strlen(name.ptr)], 0); 85 } 86 break; 87 case COMDAT: 88 if (easyomf) 89 recTyp = COMDAT + 1; // convert to MS format 90 goto case; 91 case COMDAT + 1: 92 { 93 int pickAny = 0; 94 if (*p++ & 5) // if continuation or local comdat 95 break; 96 ubyte attr = *p++; 97 if (attr & 0xF0) // attr: if multiple instances allowed 98 pickAny = 1; 99 p++; // align 100 p += 2; // enum data offset 101 if (recTyp == COMDAT + 1) 102 p += 2; // enum data offset 103 parseIdx(&p); // type index 104 if ((attr & 0x0F) == 0) // if explicit allocation 105 { 106 parseIdx(&p); // base group 107 parseIdx(&p); // base segment 108 } 109 uint idx = parseIdx(&p); // public name index 110 if (idx == 0 || idx >= names.dim) 111 { 112 //debug(printf("[s] name idx=%d, uCntNames=%d\n", idx, uCntNames)); 113 error(loc, "corrupt COMDAT"); 114 return; 115 } 116 //printf("[s] name='%s'\n",name); 117 const(char)* n = names[idx]; 118 pAddSymbol(n.toDString(), pickAny); 119 break; 120 } 121 case COMDEF: 122 { 123 while (p + 1 < pnext) 124 { 125 parseName(&p, name.ptr); 126 parseIdx(&p); // type index 127 skipDataType(&p); // data type 128 pAddSymbol(name[0 .. strlen(name.ptr)], 1); 129 } 130 break; 131 } 132 case ALIAS: 133 while (p + 1 < pnext) 134 { 135 parseName(&p, name.ptr); 136 pAddSymbol(name[0 .. strlen(name.ptr)], 0); 137 parseName(&p, name.ptr); 138 } 139 break; 140 case MODEND: 141 case M386END: 142 return; 143 case COMENT: 144 // Recognize Phar Lap EASY-OMF format 145 { 146 __gshared ubyte* omfstr1 = [0x80, 0xAA, '8', '0', '3', '8', '6']; 147 if (recLen == (omfstr1).sizeof) 148 { 149 for (uint i = 0; i < (omfstr1).sizeof; i++) 150 if (*p++ != omfstr1[i]) 151 goto L1; 152 easyomf = 1; 153 break; 154 L1: 155 } 156 } 157 // Recognize .IMPDEF Import Definition Records 158 { 159 __gshared ubyte* omfstr2 = [0, 0xA0, 1]; 160 if (recLen >= 7) 161 { 162 p++; 163 for (uint i = 1; i < (omfstr2).sizeof; i++) 164 if (*p++ != omfstr2[i]) 165 goto L2; 166 p++; // skip OrdFlag field 167 parseName(&p, name.ptr); 168 pAddSymbol(name[0 .. strlen(name.ptr)], 0); 169 break; 170 L2: 171 } 172 } 173 break; 174 default: 175 // ignore 176 } 177 } 178 } 179 180 /************************************************* 181 * Scan a block of memory buf[0..buflen], pulling out each 182 * OMF object module in it and sending the info in it to (*pAddObjModule). 183 * Returns: 184 * true for corrupt OMF data 185 */ 186 bool scanOmfLib(void delegate(char* name, void* base, size_t length) pAddObjModule, void* buf, size_t buflen, uint pagesize) 187 { 188 /* Split up the buffer buf[0..buflen] into multiple object modules, 189 * each aligned on a pagesize boundary. 190 */ 191 const(ubyte)* base = null; 192 char[LIBIDMAX + 1] name; 193 auto p = cast(const(ubyte)*)buf; 194 auto pend = p + buflen; 195 const(ubyte)* pnext; 196 for (; p < pend; p = pnext) // for each OMF record 197 { 198 if (p + 3 >= pend) 199 return true; // corrupt 200 ubyte recTyp = *p; 201 ushort recLen = *cast(const(ushort)*)(p + 1); 202 pnext = p + 3 + recLen; 203 if (pnext > pend) 204 return true; // corrupt 205 recLen--; // forget the checksum 206 switch (recTyp) 207 { 208 case LHEADR: 209 case THEADR: 210 if (!base) 211 { 212 base = p; 213 p += 3; 214 parseName(&p, name.ptr); 215 if (name[0] == 'C' && name[1] == 0) // old C compilers did this 216 base = pnext; // skip past THEADR 217 } 218 break; 219 case MODEND: 220 case M386END: 221 { 222 if (base) 223 { 224 pAddObjModule(name.ptr, cast(ubyte*)base, pnext - base); 225 base = null; 226 } 227 // Round up to next page 228 uint t = cast(uint)(pnext - cast(const(ubyte)*)buf); 229 t = (t + pagesize - 1) & ~cast(uint)(pagesize - 1); 230 pnext = cast(const(ubyte)*)buf + t; 231 break; 232 } 233 default: 234 // ignore 235 } 236 } 237 return (base !is null); // missing MODEND record 238 } 239 240 uint OMFObjSize(const(void)* base, uint length, const(char)* name) 241 { 242 ubyte c = *cast(const(ubyte)*)base; 243 if (c != THEADR && c != LHEADR) 244 { 245 size_t len = strlen(name); 246 assert(len <= LIBIDMAX); 247 length += len + 5; 248 } 249 return length; 250 } 251 252 void writeOMFObj(OutBuffer* buf, const(void)* base, uint length, const(char)* name) 253 { 254 ubyte c = *cast(const(ubyte)*)base; 255 if (c != THEADR && c != LHEADR) 256 { 257 const len = strlen(name); 258 assert(len <= LIBIDMAX); 259 ubyte[4 + LIBIDMAX + 1] header; 260 header[0] = THEADR; 261 header[1] = cast(ubyte)(2 + len); 262 header[2] = 0; 263 header[3] = cast(ubyte)len; 264 assert(len <= 0xFF - 2); 265 memcpy(4 + header.ptr, name, len); 266 // Compute and store record checksum 267 uint n = cast(uint)(len + 4); 268 ubyte checksum = 0; 269 ubyte* p = header.ptr; 270 while (n--) 271 { 272 checksum -= *p; 273 p++; 274 } 275 *p = checksum; 276 buf.write(header.ptr[0 .. len + 5]); 277 } 278 buf.write(base[0 .. length]); 279 } 280 281 private: // for the remainder of this module 282 283 /************************** 284 * Record types: 285 */ 286 enum RHEADR = 0x6E; 287 enum REGINT = 0x70; 288 enum REDATA = 0x72; 289 enum RIDATA = 0x74; 290 enum OVLDEF = 0x76; 291 enum ENDREC = 0x78; 292 enum BLKDEF = 0x7A; 293 enum BLKEND = 0x7C; 294 enum DEBSYM = 0x7E; 295 enum THEADR = 0x80; 296 enum LHEADR = 0x82; 297 enum PEDATA = 0x84; 298 enum PIDATA = 0x86; 299 enum COMENT = 0x88; 300 enum MODEND = 0x8A; 301 enum M386END = 0x8B; /* 32 bit module end record */ 302 enum EXTDEF = 0x8C; 303 enum TYPDEF = 0x8E; 304 enum PUBDEF = 0x90; 305 enum PUB386 = 0x91; 306 enum LOCSYM = 0x92; 307 enum LINNUM = 0x94; 308 enum LNAMES = 0x96; 309 enum SEGDEF = 0x98; 310 enum GRPDEF = 0x9A; 311 enum FIXUPP = 0x9C; 312 /*#define (none) 0x9E */ 313 enum LEDATA = 0xA0; 314 enum LIDATA = 0xA2; 315 enum LIBHED = 0xA4; 316 enum LIBNAM = 0xA6; 317 enum LIBLOC = 0xA8; 318 enum LIBDIC = 0xAA; 319 enum COMDEF = 0xB0; 320 enum LEXTDEF = 0xB4; 321 enum LPUBDEF = 0xB6; 322 enum LCOMDEF = 0xB8; 323 enum CEXTDEF = 0xBC; 324 enum COMDAT = 0xC2; 325 enum LINSYM = 0xC4; 326 enum ALIAS = 0xC6; 327 enum LLNAMES = 0xCA; 328 enum LIBIDMAX = (512 - 0x25 - 3 - 4); 329 330 // max size that will fit in dictionary 331 extern (C++) void parseName(const(ubyte)** pp, char* name) 332 { 333 auto p = *pp; 334 uint len = *p++; 335 if (len == 0xFF && *p == 0) // if long name 336 { 337 len = p[1] & 0xFF; 338 len |= cast(uint)p[2] << 8; 339 p += 3; 340 assert(len <= LIBIDMAX); 341 } 342 memcpy(name, p, len); 343 name[len] = 0; 344 *pp = p + len; 345 } 346 347 ushort parseIdx(const(ubyte)** pp) 348 { 349 auto p = *pp; 350 const c = *p++; 351 ushort idx = (0x80 & c) ? ((0x7F & c) << 8) + *p++ : c; 352 *pp = p; 353 return idx; 354 } 355 356 // skip numeric field of a data type of a COMDEF record 357 void skipNumericField(const(ubyte)** pp) 358 { 359 const(ubyte)* p = *pp; 360 const c = *p++; 361 if (c == 0x81) 362 p += 2; 363 else if (c == 0x84) 364 p += 3; 365 else if (c == 0x88) 366 p += 4; 367 else 368 assert(c <= 0x80); 369 *pp = p; 370 } 371 372 // skip data type of a COMDEF record 373 void skipDataType(const(ubyte)** pp) 374 { 375 auto p = *pp; 376 const c = *p++; 377 if (c == 0x61) 378 { 379 // FAR data 380 skipNumericField(&p); 381 skipNumericField(&p); 382 } 383 else if (c == 0x62) 384 { 385 // NEAR data 386 skipNumericField(&p); 387 } 388 else 389 { 390 assert(1 <= c && c <= 0x5f); // Borland segment indices 391 } 392 *pp = p; 393 }