1 /**
2  * Extract symbols from an OMF object file.
3  *
4  * Copyright:   Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanomf.d, _scanomf.d)
8  * Documentation:  https://dlang.org/phobos/dmd_scanomf.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanomf.d
10  */
11 
12 module dmd.scanomf;
13 
14 version(Windows):
15 
16 import core.stdc..string;
17 import core.stdc.stdlib;
18 import dmd.globals;
19 import dmd.root.rmem;
20 import dmd.root.outbuffer;
21 import dmd.root..string;
22 import dmd.arraytypes;
23 import dmd.errors;
24 
25 private enum LOG = false;
26 
27 /*****************************************
28  * Reads an object module from base[] and passes the names
29  * of any exported symbols to (*pAddSymbol)().
30  * Params:
31  *      pAddSymbol =  function to pass the names to
32  *      base =        array of contents of object module
33  *      module_name = name of the object module (used for error messages)
34  *      loc =         location to use for error printing
35  */
36 void scanOmfObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol,
37         const(ubyte)[] base, const(char)* module_name, Loc loc)
38 {
39     static if (LOG)
40     {
41         printf("scanOmfObjModule(%s)\n", module_name);
42     }
43     int easyomf;
44     char[LIBIDMAX + 1] name;
45     Strings names;
46     scope(exit)
47         for (size_t u = 1; u < names.dim; u++)
48             free(cast(void*)names[u]);
49     names.push(null); // don't use index 0
50     easyomf = 0; // assume not EASY-OMF
51     auto pend = cast(const(ubyte)*)base.ptr + base.length;
52     const(ubyte)* pnext;
53     for (auto p = cast(const(ubyte)*)base.ptr; 1; p = pnext)
54     {
55         assert(p < pend);
56         ubyte recTyp = *p++;
57         ushort recLen = *cast(ushort*)p;
58         p += 2;
59         pnext = p + recLen;
60         recLen--; // forget the checksum
61         switch (recTyp)
62         {
63         case LNAMES:
64         case LLNAMES:
65             while (p + 1 < pnext)
66             {
67                 parseName(&p, name.ptr);
68                 char* copy = cast(char*)Mem.check(strdup(name.ptr));
69                 names.push(copy);
70             }
71             break;
72         case PUBDEF:
73             if (easyomf)
74                 recTyp = PUB386; // convert to MS format
75             goto case;
76         case PUB386:
77             if (!(parseIdx(&p) | parseIdx(&p)))
78                 p += 2; // skip seg, grp, frame
79             while (p + 1 < pnext)
80             {
81                 parseName(&p, name.ptr);
82                 p += (recTyp == PUBDEF) ? 2 : 4; // skip offset
83                 parseIdx(&p); // skip type index
84                 pAddSymbol(name[0 .. strlen(name.ptr)], 0);
85             }
86             break;
87         case COMDAT:
88             if (easyomf)
89                 recTyp = COMDAT + 1; // convert to MS format
90             goto case;
91         case COMDAT + 1:
92             {
93                 int pickAny = 0;
94                 if (*p++ & 5) // if continuation or local comdat
95                     break;
96                 ubyte attr = *p++;
97                 if (attr & 0xF0) // attr: if multiple instances allowed
98                     pickAny = 1;
99                 p++; // align
100                 p += 2; // enum data offset
101                 if (recTyp == COMDAT + 1)
102                     p += 2; // enum data offset
103                 parseIdx(&p); // type index
104                 if ((attr & 0x0F) == 0) // if explicit allocation
105                 {
106                     parseIdx(&p); // base group
107                     parseIdx(&p); // base segment
108                 }
109                 uint idx = parseIdx(&p); // public name index
110                 if (idx == 0 || idx >= names.dim)
111                 {
112                     //debug(printf("[s] name idx=%d, uCntNames=%d\n", idx, uCntNames));
113                     error(loc, "corrupt COMDAT");
114                     return;
115                 }
116                 //printf("[s] name='%s'\n",name);
117                 const(char)* n = names[idx];
118                 pAddSymbol(n.toDString(), pickAny);
119                 break;
120             }
121         case COMDEF:
122             {
123                 while (p + 1 < pnext)
124                 {
125                     parseName(&p, name.ptr);
126                     parseIdx(&p); // type index
127                     skipDataType(&p); // data type
128                     pAddSymbol(name[0 .. strlen(name.ptr)], 1);
129                 }
130                 break;
131             }
132         case ALIAS:
133             while (p + 1 < pnext)
134             {
135                 parseName(&p, name.ptr);
136                 pAddSymbol(name[0 .. strlen(name.ptr)], 0);
137                 parseName(&p, name.ptr);
138             }
139             break;
140         case MODEND:
141         case M386END:
142             return;
143         case COMENT:
144             // Recognize Phar Lap EASY-OMF format
145             {
146                 __gshared ubyte* omfstr1 = [0x80, 0xAA, '8', '0', '3', '8', '6'];
147                 if (recLen == (omfstr1).sizeof)
148                 {
149                     for (uint i = 0; i < (omfstr1).sizeof; i++)
150                         if (*p++ != omfstr1[i])
151                             goto L1;
152                     easyomf = 1;
153                     break;
154                 L1:
155                 }
156             }
157             // Recognize .IMPDEF Import Definition Records
158             {
159                 __gshared ubyte* omfstr2 = [0, 0xA0, 1];
160                 if (recLen >= 7)
161                 {
162                     p++;
163                     for (uint i = 1; i < (omfstr2).sizeof; i++)
164                         if (*p++ != omfstr2[i])
165                             goto L2;
166                     p++; // skip OrdFlag field
167                     parseName(&p, name.ptr);
168                     pAddSymbol(name[0 .. strlen(name.ptr)], 0);
169                     break;
170                 L2:
171                 }
172             }
173             break;
174         default:
175             // ignore
176         }
177     }
178 }
179 
180 /*************************************************
181  * Scan a block of memory buf[0..buflen], pulling out each
182  * OMF object module in it and sending the info in it to (*pAddObjModule).
183  * Returns:
184  *      true for corrupt OMF data
185  */
186 bool scanOmfLib(void delegate(char* name, void* base, size_t length) pAddObjModule, void* buf, size_t buflen, uint pagesize)
187 {
188     /* Split up the buffer buf[0..buflen] into multiple object modules,
189      * each aligned on a pagesize boundary.
190      */
191     const(ubyte)* base = null;
192     char[LIBIDMAX + 1] name;
193     auto p = cast(const(ubyte)*)buf;
194     auto pend = p + buflen;
195     const(ubyte)* pnext;
196     for (; p < pend; p = pnext) // for each OMF record
197     {
198         if (p + 3 >= pend)
199             return true; // corrupt
200         ubyte recTyp = *p;
201         ushort recLen = *cast(const(ushort)*)(p + 1);
202         pnext = p + 3 + recLen;
203         if (pnext > pend)
204             return true; // corrupt
205         recLen--; // forget the checksum
206         switch (recTyp)
207         {
208         case LHEADR:
209         case THEADR:
210             if (!base)
211             {
212                 base = p;
213                 p += 3;
214                 parseName(&p, name.ptr);
215                 if (name[0] == 'C' && name[1] == 0) // old C compilers did this
216                     base = pnext; // skip past THEADR
217             }
218             break;
219         case MODEND:
220         case M386END:
221             {
222                 if (base)
223                 {
224                     pAddObjModule(name.ptr, cast(ubyte*)base, pnext - base);
225                     base = null;
226                 }
227                 // Round up to next page
228                 uint t = cast(uint)(pnext - cast(const(ubyte)*)buf);
229                 t = (t + pagesize - 1) & ~cast(uint)(pagesize - 1);
230                 pnext = cast(const(ubyte)*)buf + t;
231                 break;
232             }
233         default:
234             // ignore
235         }
236     }
237     return (base !is null); // missing MODEND record
238 }
239 
240 uint OMFObjSize(const(void)* base, uint length, const(char)* name)
241 {
242     ubyte c = *cast(const(ubyte)*)base;
243     if (c != THEADR && c != LHEADR)
244     {
245         size_t len = strlen(name);
246         assert(len <= LIBIDMAX);
247         length += len + 5;
248     }
249     return length;
250 }
251 
252 void writeOMFObj(OutBuffer* buf, const(void)* base, uint length, const(char)* name)
253 {
254     ubyte c = *cast(const(ubyte)*)base;
255     if (c != THEADR && c != LHEADR)
256     {
257         const len = strlen(name);
258         assert(len <= LIBIDMAX);
259         ubyte[4 + LIBIDMAX + 1] header;
260         header[0] = THEADR;
261         header[1] = cast(ubyte)(2 + len);
262         header[2] = 0;
263         header[3] = cast(ubyte)len;
264         assert(len <= 0xFF - 2);
265         memcpy(4 + header.ptr, name, len);
266         // Compute and store record checksum
267         uint n = cast(uint)(len + 4);
268         ubyte checksum = 0;
269         ubyte* p = header.ptr;
270         while (n--)
271         {
272             checksum -= *p;
273             p++;
274         }
275         *p = checksum;
276         buf.write(header.ptr[0 .. len + 5]);
277     }
278     buf.write(base[0 .. length]);
279 }
280 
281 private: // for the remainder of this module
282 
283 /**************************
284  * Record types:
285  */
286 enum RHEADR = 0x6E;
287 enum REGINT = 0x70;
288 enum REDATA = 0x72;
289 enum RIDATA = 0x74;
290 enum OVLDEF = 0x76;
291 enum ENDREC = 0x78;
292 enum BLKDEF = 0x7A;
293 enum BLKEND = 0x7C;
294 enum DEBSYM = 0x7E;
295 enum THEADR = 0x80;
296 enum LHEADR = 0x82;
297 enum PEDATA = 0x84;
298 enum PIDATA = 0x86;
299 enum COMENT = 0x88;
300 enum MODEND = 0x8A;
301 enum M386END = 0x8B; /* 32 bit module end record */
302 enum EXTDEF = 0x8C;
303 enum TYPDEF = 0x8E;
304 enum PUBDEF = 0x90;
305 enum PUB386 = 0x91;
306 enum LOCSYM = 0x92;
307 enum LINNUM = 0x94;
308 enum LNAMES = 0x96;
309 enum SEGDEF = 0x98;
310 enum GRPDEF = 0x9A;
311 enum FIXUPP = 0x9C;
312 /*#define (none)        0x9E    */
313 enum LEDATA = 0xA0;
314 enum LIDATA = 0xA2;
315 enum LIBHED = 0xA4;
316 enum LIBNAM = 0xA6;
317 enum LIBLOC = 0xA8;
318 enum LIBDIC = 0xAA;
319 enum COMDEF = 0xB0;
320 enum LEXTDEF = 0xB4;
321 enum LPUBDEF = 0xB6;
322 enum LCOMDEF = 0xB8;
323 enum CEXTDEF = 0xBC;
324 enum COMDAT = 0xC2;
325 enum LINSYM = 0xC4;
326 enum ALIAS = 0xC6;
327 enum LLNAMES = 0xCA;
328 enum LIBIDMAX = (512 - 0x25 - 3 - 4);
329 
330 // max size that will fit in dictionary
331 extern (C++) void parseName(const(ubyte)** pp, char* name)
332 {
333     auto p = *pp;
334     uint len = *p++;
335     if (len == 0xFF && *p == 0) // if long name
336     {
337         len = p[1] & 0xFF;
338         len |= cast(uint)p[2] << 8;
339         p += 3;
340         assert(len <= LIBIDMAX);
341     }
342     memcpy(name, p, len);
343     name[len] = 0;
344     *pp = p + len;
345 }
346 
347 ushort parseIdx(const(ubyte)** pp)
348 {
349     auto p = *pp;
350     const c = *p++;
351     ushort idx = (0x80 & c) ? ((0x7F & c) << 8) + *p++ : c;
352     *pp = p;
353     return idx;
354 }
355 
356 // skip numeric field of a data type of a COMDEF record
357 void skipNumericField(const(ubyte)** pp)
358 {
359     const(ubyte)* p = *pp;
360     const c = *p++;
361     if (c == 0x81)
362         p += 2;
363     else if (c == 0x84)
364         p += 3;
365     else if (c == 0x88)
366         p += 4;
367     else
368         assert(c <= 0x80);
369     *pp = p;
370 }
371 
372 // skip data type of a COMDEF record
373 void skipDataType(const(ubyte)** pp)
374 {
375     auto p = *pp;
376     const c = *p++;
377     if (c == 0x61)
378     {
379         // FAR data
380         skipNumericField(&p);
381         skipNumericField(&p);
382     }
383     else if (c == 0x62)
384     {
385         // NEAR data
386         skipNumericField(&p);
387     }
388     else
389     {
390         assert(1 <= c && c <= 0x5f); // Borland segment indices
391     }
392     *pp = p;
393 }