1 /** 2 * Extract symbols from an OMF object file. 3 * 4 * Copyright: Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved 5 * Authors: $(LINK2 http://www.digitalmars.com, Walter Bright) 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanomf.d, _scanomf.d) 8 * Documentation: https://dlang.org/phobos/dmd_scanomf.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanomf.d 10 */ 11 12 module dmd.scanomf; 13 14 import core.stdc.string; 15 import core.stdc.stdlib; 16 import dmd.globals; 17 import dmd.root.rmem; 18 import dmd.root.outbuffer; 19 import dmd.root.string; 20 import dmd.arraytypes; 21 import dmd.errors; 22 23 private enum LOG = false; 24 25 /***************************************** 26 * Reads an object module from base[] and passes the names 27 * of any exported symbols to (*pAddSymbol)(). 28 * Params: 29 * pAddSymbol = function to pass the names to 30 * base = array of contents of object module 31 * module_name = name of the object module (used for error messages) 32 * loc = location to use for error printing 33 */ 34 void scanOmfObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol, 35 const(ubyte)[] base, const(char)* module_name, Loc loc) 36 { 37 static if (LOG) 38 { 39 printf("scanOmfObjModule(%s)\n", module_name); 40 } 41 int easyomf; 42 char[LIBIDMAX + 1] name; 43 Strings names; 44 scope(exit) 45 for (size_t u = 1; u < names.dim; u++) 46 free(cast(void*)names[u]); 47 names.push(null); // don't use index 0 48 easyomf = 0; // assume not EASY-OMF 49 auto pend = cast(const(ubyte)*)base.ptr + base.length; 50 const(ubyte)* pnext; 51 for (auto p = cast(const(ubyte)*)base.ptr; 1; p = pnext) 52 { 53 assert(p < pend); 54 ubyte recTyp = *p++; 55 ushort recLen = *cast(ushort*)p; 56 p += 2; 57 pnext = p + recLen; 58 recLen--; // forget the checksum 59 switch (recTyp) 60 { 61 case LNAMES: 62 case LLNAMES: 63 while (p + 1 < pnext) 64 { 65 parseName(&p, name.ptr); 66 char* copy = cast(char*)Mem.check(strdup(name.ptr)); 67 names.push(copy); 68 } 69 break; 70 case PUBDEF: 71 if (easyomf) 72 recTyp = PUB386; // convert to MS format 73 goto case; 74 case PUB386: 75 if (!(parseIdx(&p) | parseIdx(&p))) 76 p += 2; // skip seg, grp, frame 77 while (p + 1 < pnext) 78 { 79 parseName(&p, name.ptr); 80 p += (recTyp == PUBDEF) ? 2 : 4; // skip offset 81 parseIdx(&p); // skip type index 82 pAddSymbol(name[0 .. strlen(name.ptr)], 0); 83 } 84 break; 85 case COMDAT: 86 if (easyomf) 87 recTyp = COMDAT + 1; // convert to MS format 88 goto case; 89 case COMDAT + 1: 90 { 91 int pickAny = 0; 92 if (*p++ & 5) // if continuation or local comdat 93 break; 94 ubyte attr = *p++; 95 if (attr & 0xF0) // attr: if multiple instances allowed 96 pickAny = 1; 97 p++; // align 98 p += 2; // enum data offset 99 if (recTyp == COMDAT + 1) 100 p += 2; // enum data offset 101 parseIdx(&p); // type index 102 if ((attr & 0x0F) == 0) // if explicit allocation 103 { 104 parseIdx(&p); // base group 105 parseIdx(&p); // base segment 106 } 107 uint idx = parseIdx(&p); // public name index 108 if (idx == 0 || idx >= names.dim) 109 { 110 //debug(printf("[s] name idx=%d, uCntNames=%d\n", idx, uCntNames)); 111 error(loc, "corrupt COMDAT"); 112 return; 113 } 114 //printf("[s] name='%s'\n",name); 115 const(char)* n = names[idx]; 116 pAddSymbol(n.toDString(), pickAny); 117 break; 118 } 119 case COMDEF: 120 { 121 while (p + 1 < pnext) 122 { 123 parseName(&p, name.ptr); 124 parseIdx(&p); // type index 125 skipDataType(&p); // data type 126 pAddSymbol(name[0 .. strlen(name.ptr)], 1); 127 } 128 break; 129 } 130 case ALIAS: 131 while (p + 1 < pnext) 132 { 133 parseName(&p, name.ptr); 134 pAddSymbol(name[0 .. strlen(name.ptr)], 0); 135 parseName(&p, name.ptr); 136 } 137 break; 138 case MODEND: 139 case M386END: 140 return; 141 case COMENT: 142 // Recognize Phar Lap EASY-OMF format 143 { 144 __gshared ubyte* omfstr1 = [0x80, 0xAA, '8', '0', '3', '8', '6']; 145 if (recLen == (omfstr1).sizeof) 146 { 147 for (uint i = 0; i < (omfstr1).sizeof; i++) 148 if (*p++ != omfstr1[i]) 149 goto L1; 150 easyomf = 1; 151 break; 152 L1: 153 } 154 } 155 // Recognize .IMPDEF Import Definition Records 156 { 157 __gshared ubyte* omfstr2 = [0, 0xA0, 1]; 158 if (recLen >= 7) 159 { 160 p++; 161 for (uint i = 1; i < (omfstr2).sizeof; i++) 162 if (*p++ != omfstr2[i]) 163 goto L2; 164 p++; // skip OrdFlag field 165 parseName(&p, name.ptr); 166 pAddSymbol(name[0 .. strlen(name.ptr)], 0); 167 break; 168 L2: 169 } 170 } 171 break; 172 default: 173 // ignore 174 } 175 } 176 } 177 178 /************************************************* 179 * Scan a block of memory buf[0..buflen], pulling out each 180 * OMF object module in it and sending the info in it to (*pAddObjModule). 181 * Returns: 182 * true for corrupt OMF data 183 */ 184 bool scanOmfLib(void delegate(char* name, void* base, size_t length) pAddObjModule, void* buf, size_t buflen, uint pagesize) 185 { 186 /* Split up the buffer buf[0..buflen] into multiple object modules, 187 * each aligned on a pagesize boundary. 188 */ 189 const(ubyte)* base = null; 190 char[LIBIDMAX + 1] name; 191 auto p = cast(const(ubyte)*)buf; 192 auto pend = p + buflen; 193 const(ubyte)* pnext; 194 for (; p < pend; p = pnext) // for each OMF record 195 { 196 if (p + 3 >= pend) 197 return true; // corrupt 198 ubyte recTyp = *p; 199 ushort recLen = *cast(const(ushort)*)(p + 1); 200 pnext = p + 3 + recLen; 201 if (pnext > pend) 202 return true; // corrupt 203 recLen--; // forget the checksum 204 switch (recTyp) 205 { 206 case LHEADR: 207 case THEADR: 208 if (!base) 209 { 210 base = p; 211 p += 3; 212 parseName(&p, name.ptr); 213 if (name[0] == 'C' && name[1] == 0) // old C compilers did this 214 base = pnext; // skip past THEADR 215 } 216 break; 217 case MODEND: 218 case M386END: 219 { 220 if (base) 221 { 222 pAddObjModule(name.ptr, cast(ubyte*)base, pnext - base); 223 base = null; 224 } 225 // Round up to next page 226 uint t = cast(uint)(pnext - cast(const(ubyte)*)buf); 227 t = (t + pagesize - 1) & ~cast(uint)(pagesize - 1); 228 pnext = cast(const(ubyte)*)buf + t; 229 break; 230 } 231 default: 232 // ignore 233 } 234 } 235 return (base !is null); // missing MODEND record 236 } 237 238 uint OMFObjSize(const(void)* base, uint length, const(char)* name) 239 { 240 ubyte c = *cast(const(ubyte)*)base; 241 if (c != THEADR && c != LHEADR) 242 { 243 size_t len = strlen(name); 244 assert(len <= LIBIDMAX); 245 length += len + 5; 246 } 247 return length; 248 } 249 250 void writeOMFObj(OutBuffer* buf, const(void)* base, uint length, const(char)* name) 251 { 252 ubyte c = *cast(const(ubyte)*)base; 253 if (c != THEADR && c != LHEADR) 254 { 255 const len = strlen(name); 256 assert(len <= LIBIDMAX); 257 ubyte[4 + LIBIDMAX + 1] header; 258 header[0] = THEADR; 259 header[1] = cast(ubyte)(2 + len); 260 header[2] = 0; 261 header[3] = cast(ubyte)len; 262 assert(len <= 0xFF - 2); 263 memcpy(4 + header.ptr, name, len); 264 // Compute and store record checksum 265 uint n = cast(uint)(len + 4); 266 ubyte checksum = 0; 267 ubyte* p = header.ptr; 268 while (n--) 269 { 270 checksum -= *p; 271 p++; 272 } 273 *p = checksum; 274 buf.write(header.ptr[0 .. len + 5]); 275 } 276 buf.write(base[0 .. length]); 277 } 278 279 private: // for the remainder of this module 280 281 /************************** 282 * Record types: 283 */ 284 enum RHEADR = 0x6E; 285 enum REGINT = 0x70; 286 enum REDATA = 0x72; 287 enum RIDATA = 0x74; 288 enum OVLDEF = 0x76; 289 enum ENDREC = 0x78; 290 enum BLKDEF = 0x7A; 291 enum BLKEND = 0x7C; 292 enum DEBSYM = 0x7E; 293 enum THEADR = 0x80; 294 enum LHEADR = 0x82; 295 enum PEDATA = 0x84; 296 enum PIDATA = 0x86; 297 enum COMENT = 0x88; 298 enum MODEND = 0x8A; 299 enum M386END = 0x8B; /* 32 bit module end record */ 300 enum EXTDEF = 0x8C; 301 enum TYPDEF = 0x8E; 302 enum PUBDEF = 0x90; 303 enum PUB386 = 0x91; 304 enum LOCSYM = 0x92; 305 enum LINNUM = 0x94; 306 enum LNAMES = 0x96; 307 enum SEGDEF = 0x98; 308 enum GRPDEF = 0x9A; 309 enum FIXUPP = 0x9C; 310 /*#define (none) 0x9E */ 311 enum LEDATA = 0xA0; 312 enum LIDATA = 0xA2; 313 enum LIBHED = 0xA4; 314 enum LIBNAM = 0xA6; 315 enum LIBLOC = 0xA8; 316 enum LIBDIC = 0xAA; 317 enum COMDEF = 0xB0; 318 enum LEXTDEF = 0xB4; 319 enum LPUBDEF = 0xB6; 320 enum LCOMDEF = 0xB8; 321 enum CEXTDEF = 0xBC; 322 enum COMDAT = 0xC2; 323 enum LINSYM = 0xC4; 324 enum ALIAS = 0xC6; 325 enum LLNAMES = 0xCA; 326 enum LIBIDMAX = (512 - 0x25 - 3 - 4); 327 328 // max size that will fit in dictionary 329 extern (C++) void parseName(const(ubyte)** pp, char* name) 330 { 331 auto p = *pp; 332 uint len = *p++; 333 if (len == 0xFF && *p == 0) // if long name 334 { 335 len = p[1] & 0xFF; 336 len |= cast(uint)p[2] << 8; 337 p += 3; 338 assert(len <= LIBIDMAX); 339 } 340 memcpy(name, p, len); 341 name[len] = 0; 342 *pp = p + len; 343 } 344 345 ushort parseIdx(const(ubyte)** pp) 346 { 347 auto p = *pp; 348 const c = *p++; 349 ushort idx = (0x80 & c) ? ((0x7F & c) << 8) + *p++ : c; 350 *pp = p; 351 return idx; 352 } 353 354 // skip numeric field of a data type of a COMDEF record 355 void skipNumericField(const(ubyte)** pp) 356 { 357 const(ubyte)* p = *pp; 358 const c = *p++; 359 if (c == 0x81) 360 p += 2; 361 else if (c == 0x84) 362 p += 3; 363 else if (c == 0x88) 364 p += 4; 365 else 366 assert(c <= 0x80); 367 *pp = p; 368 } 369 370 // skip data type of a COMDEF record 371 void skipDataType(const(ubyte)** pp) 372 { 373 auto p = *pp; 374 const c = *p++; 375 if (c == 0x61) 376 { 377 // FAR data 378 skipNumericField(&p); 379 skipNumericField(&p); 380 } 381 else if (c == 0x62) 382 { 383 // NEAR data 384 skipNumericField(&p); 385 } 386 else 387 { 388 assert(1 <= c && c <= 0x5f); // Borland segment indices 389 } 390 *pp = p; 391 }