1 /**
2  * Extract symbols from an OMF object file.
3  *
4  * Copyright:   Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanomf.d, _scanomf.d)
8  * Documentation:  https://dlang.org/phobos/dmd_scanomf.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanomf.d
10  */
11 
12 module dmd.scanomf;
13 
14 import core.stdc.string;
15 import core.stdc.stdlib;
16 import dmd.globals;
17 import dmd.root.rmem;
18 import dmd.root.outbuffer;
19 import dmd.root.string;
20 import dmd.arraytypes;
21 import dmd.errors;
22 
23 private enum LOG = false;
24 
25 /*****************************************
26  * Reads an object module from base[] and passes the names
27  * of any exported symbols to (*pAddSymbol)().
28  * Params:
29  *      pAddSymbol =  function to pass the names to
30  *      base =        array of contents of object module
31  *      module_name = name of the object module (used for error messages)
32  *      loc =         location to use for error printing
33  */
34 void scanOmfObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol,
35         const(ubyte)[] base, const(char)* module_name, Loc loc)
36 {
37     static if (LOG)
38     {
39         printf("scanOmfObjModule(%s)\n", module_name);
40     }
41     int easyomf;
42     char[LIBIDMAX + 1] name;
43     Strings names;
44     scope(exit)
45         for (size_t u = 1; u < names.dim; u++)
46             free(cast(void*)names[u]);
47     names.push(null); // don't use index 0
48     easyomf = 0; // assume not EASY-OMF
49     auto pend = cast(const(ubyte)*)base.ptr + base.length;
50     const(ubyte)* pnext;
51     for (auto p = cast(const(ubyte)*)base.ptr; 1; p = pnext)
52     {
53         assert(p < pend);
54         ubyte recTyp = *p++;
55         ushort recLen = *cast(ushort*)p;
56         p += 2;
57         pnext = p + recLen;
58         recLen--; // forget the checksum
59         switch (recTyp)
60         {
61         case LNAMES:
62         case LLNAMES:
63             while (p + 1 < pnext)
64             {
65                 parseName(&p, name.ptr);
66                 char* copy = cast(char*)Mem.check(strdup(name.ptr));
67                 names.push(copy);
68             }
69             break;
70         case PUBDEF:
71             if (easyomf)
72                 recTyp = PUB386; // convert to MS format
73             goto case;
74         case PUB386:
75             if (!(parseIdx(&p) | parseIdx(&p)))
76                 p += 2; // skip seg, grp, frame
77             while (p + 1 < pnext)
78             {
79                 parseName(&p, name.ptr);
80                 p += (recTyp == PUBDEF) ? 2 : 4; // skip offset
81                 parseIdx(&p); // skip type index
82                 pAddSymbol(name[0 .. strlen(name.ptr)], 0);
83             }
84             break;
85         case COMDAT:
86             if (easyomf)
87                 recTyp = COMDAT + 1; // convert to MS format
88             goto case;
89         case COMDAT + 1:
90             {
91                 int pickAny = 0;
92                 if (*p++ & 5) // if continuation or local comdat
93                     break;
94                 ubyte attr = *p++;
95                 if (attr & 0xF0) // attr: if multiple instances allowed
96                     pickAny = 1;
97                 p++; // align
98                 p += 2; // enum data offset
99                 if (recTyp == COMDAT + 1)
100                     p += 2; // enum data offset
101                 parseIdx(&p); // type index
102                 if ((attr & 0x0F) == 0) // if explicit allocation
103                 {
104                     parseIdx(&p); // base group
105                     parseIdx(&p); // base segment
106                 }
107                 uint idx = parseIdx(&p); // public name index
108                 if (idx == 0 || idx >= names.dim)
109                 {
110                     //debug(printf("[s] name idx=%d, uCntNames=%d\n", idx, uCntNames));
111                     error(loc, "corrupt COMDAT");
112                     return;
113                 }
114                 //printf("[s] name='%s'\n",name);
115                 const(char)* n = names[idx];
116                 pAddSymbol(n.toDString(), pickAny);
117                 break;
118             }
119         case COMDEF:
120             {
121                 while (p + 1 < pnext)
122                 {
123                     parseName(&p, name.ptr);
124                     parseIdx(&p); // type index
125                     skipDataType(&p); // data type
126                     pAddSymbol(name[0 .. strlen(name.ptr)], 1);
127                 }
128                 break;
129             }
130         case ALIAS:
131             while (p + 1 < pnext)
132             {
133                 parseName(&p, name.ptr);
134                 pAddSymbol(name[0 .. strlen(name.ptr)], 0);
135                 parseName(&p, name.ptr);
136             }
137             break;
138         case MODEND:
139         case M386END:
140             return;
141         case COMENT:
142             // Recognize Phar Lap EASY-OMF format
143             {
144                 __gshared ubyte* omfstr1 = [0x80, 0xAA, '8', '0', '3', '8', '6'];
145                 if (recLen == (omfstr1).sizeof)
146                 {
147                     for (uint i = 0; i < (omfstr1).sizeof; i++)
148                         if (*p++ != omfstr1[i])
149                             goto L1;
150                     easyomf = 1;
151                     break;
152                 L1:
153                 }
154             }
155             // Recognize .IMPDEF Import Definition Records
156             {
157                 __gshared ubyte* omfstr2 = [0, 0xA0, 1];
158                 if (recLen >= 7)
159                 {
160                     p++;
161                     for (uint i = 1; i < (omfstr2).sizeof; i++)
162                         if (*p++ != omfstr2[i])
163                             goto L2;
164                     p++; // skip OrdFlag field
165                     parseName(&p, name.ptr);
166                     pAddSymbol(name[0 .. strlen(name.ptr)], 0);
167                     break;
168                 L2:
169                 }
170             }
171             break;
172         default:
173             // ignore
174         }
175     }
176 }
177 
178 /*************************************************
179  * Scan a block of memory buf[0..buflen], pulling out each
180  * OMF object module in it and sending the info in it to (*pAddObjModule).
181  * Returns:
182  *      true for corrupt OMF data
183  */
184 bool scanOmfLib(void delegate(char* name, void* base, size_t length) pAddObjModule, void* buf, size_t buflen, uint pagesize)
185 {
186     /* Split up the buffer buf[0..buflen] into multiple object modules,
187      * each aligned on a pagesize boundary.
188      */
189     const(ubyte)* base = null;
190     char[LIBIDMAX + 1] name;
191     auto p = cast(const(ubyte)*)buf;
192     auto pend = p + buflen;
193     const(ubyte)* pnext;
194     for (; p < pend; p = pnext) // for each OMF record
195     {
196         if (p + 3 >= pend)
197             return true; // corrupt
198         ubyte recTyp = *p;
199         ushort recLen = *cast(const(ushort)*)(p + 1);
200         pnext = p + 3 + recLen;
201         if (pnext > pend)
202             return true; // corrupt
203         recLen--; // forget the checksum
204         switch (recTyp)
205         {
206         case LHEADR:
207         case THEADR:
208             if (!base)
209             {
210                 base = p;
211                 p += 3;
212                 parseName(&p, name.ptr);
213                 if (name[0] == 'C' && name[1] == 0) // old C compilers did this
214                     base = pnext; // skip past THEADR
215             }
216             break;
217         case MODEND:
218         case M386END:
219             {
220                 if (base)
221                 {
222                     pAddObjModule(name.ptr, cast(ubyte*)base, pnext - base);
223                     base = null;
224                 }
225                 // Round up to next page
226                 uint t = cast(uint)(pnext - cast(const(ubyte)*)buf);
227                 t = (t + pagesize - 1) & ~cast(uint)(pagesize - 1);
228                 pnext = cast(const(ubyte)*)buf + t;
229                 break;
230             }
231         default:
232             // ignore
233         }
234     }
235     return (base !is null); // missing MODEND record
236 }
237 
238 uint OMFObjSize(const(void)* base, uint length, const(char)* name)
239 {
240     ubyte c = *cast(const(ubyte)*)base;
241     if (c != THEADR && c != LHEADR)
242     {
243         size_t len = strlen(name);
244         assert(len <= LIBIDMAX);
245         length += len + 5;
246     }
247     return length;
248 }
249 
250 void writeOMFObj(OutBuffer* buf, const(void)* base, uint length, const(char)* name)
251 {
252     ubyte c = *cast(const(ubyte)*)base;
253     if (c != THEADR && c != LHEADR)
254     {
255         const len = strlen(name);
256         assert(len <= LIBIDMAX);
257         ubyte[4 + LIBIDMAX + 1] header;
258         header[0] = THEADR;
259         header[1] = cast(ubyte)(2 + len);
260         header[2] = 0;
261         header[3] = cast(ubyte)len;
262         assert(len <= 0xFF - 2);
263         memcpy(4 + header.ptr, name, len);
264         // Compute and store record checksum
265         uint n = cast(uint)(len + 4);
266         ubyte checksum = 0;
267         ubyte* p = header.ptr;
268         while (n--)
269         {
270             checksum -= *p;
271             p++;
272         }
273         *p = checksum;
274         buf.write(header.ptr[0 .. len + 5]);
275     }
276     buf.write(base[0 .. length]);
277 }
278 
279 private: // for the remainder of this module
280 
281 /**************************
282  * Record types:
283  */
284 enum RHEADR = 0x6E;
285 enum REGINT = 0x70;
286 enum REDATA = 0x72;
287 enum RIDATA = 0x74;
288 enum OVLDEF = 0x76;
289 enum ENDREC = 0x78;
290 enum BLKDEF = 0x7A;
291 enum BLKEND = 0x7C;
292 enum DEBSYM = 0x7E;
293 enum THEADR = 0x80;
294 enum LHEADR = 0x82;
295 enum PEDATA = 0x84;
296 enum PIDATA = 0x86;
297 enum COMENT = 0x88;
298 enum MODEND = 0x8A;
299 enum M386END = 0x8B; /* 32 bit module end record */
300 enum EXTDEF = 0x8C;
301 enum TYPDEF = 0x8E;
302 enum PUBDEF = 0x90;
303 enum PUB386 = 0x91;
304 enum LOCSYM = 0x92;
305 enum LINNUM = 0x94;
306 enum LNAMES = 0x96;
307 enum SEGDEF = 0x98;
308 enum GRPDEF = 0x9A;
309 enum FIXUPP = 0x9C;
310 /*#define (none)        0x9E    */
311 enum LEDATA = 0xA0;
312 enum LIDATA = 0xA2;
313 enum LIBHED = 0xA4;
314 enum LIBNAM = 0xA6;
315 enum LIBLOC = 0xA8;
316 enum LIBDIC = 0xAA;
317 enum COMDEF = 0xB0;
318 enum LEXTDEF = 0xB4;
319 enum LPUBDEF = 0xB6;
320 enum LCOMDEF = 0xB8;
321 enum CEXTDEF = 0xBC;
322 enum COMDAT = 0xC2;
323 enum LINSYM = 0xC4;
324 enum ALIAS = 0xC6;
325 enum LLNAMES = 0xCA;
326 enum LIBIDMAX = (512 - 0x25 - 3 - 4);
327 
328 // max size that will fit in dictionary
329 extern (C++) void parseName(const(ubyte)** pp, char* name)
330 {
331     auto p = *pp;
332     uint len = *p++;
333     if (len == 0xFF && *p == 0) // if long name
334     {
335         len = p[1] & 0xFF;
336         len |= cast(uint)p[2] << 8;
337         p += 3;
338         assert(len <= LIBIDMAX);
339     }
340     memcpy(name, p, len);
341     name[len] = 0;
342     *pp = p + len;
343 }
344 
345 ushort parseIdx(const(ubyte)** pp)
346 {
347     auto p = *pp;
348     const c = *p++;
349     ushort idx = (0x80 & c) ? ((0x7F & c) << 8) + *p++ : c;
350     *pp = p;
351     return idx;
352 }
353 
354 // skip numeric field of a data type of a COMDEF record
355 void skipNumericField(const(ubyte)** pp)
356 {
357     const(ubyte)* p = *pp;
358     const c = *p++;
359     if (c == 0x81)
360         p += 2;
361     else if (c == 0x84)
362         p += 3;
363     else if (c == 0x88)
364         p += 4;
365     else
366         assert(c <= 0x80);
367     *pp = p;
368 }
369 
370 // skip data type of a COMDEF record
371 void skipDataType(const(ubyte)** pp)
372 {
373     auto p = *pp;
374     const c = *p++;
375     if (c == 0x61)
376     {
377         // FAR data
378         skipNumericField(&p);
379         skipNumericField(&p);
380     }
381     else if (c == 0x62)
382     {
383         // NEAR data
384         skipNumericField(&p);
385     }
386     else
387     {
388         assert(1 <= c && c <= 0x5f); // Borland segment indices
389     }
390     *pp = p;
391 }