1 /**
2  * A library in the OMF format, a legacy format for 32-bit Windows.
3  *
4  * Copyright:   Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libomf.d, _libomf.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libomf.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libomf.d
10  */
11 
12 module dmd.libomf;
13 
14 import core.stdc.stdio;
15 import core.stdc.string;
16 import core.stdc.stdlib;
17 import core.bitop;
18 
19 import dmd.globals;
20 import dmd.utils;
21 import dmd.lib;
22 
23 import dmd.root.array;
24 import dmd.root.file;
25 import dmd.root.filename;
26 import dmd.root.rmem;
27 import dmd.root.outbuffer;
28 import dmd.root.string;
29 import dmd.root.stringtable;
30 
31 import dmd.scanomf;
32 
33 // Entry point (only public symbol in this module).
34 extern (C++) Library LibOMF_factory()
35 {
36     return new LibOMF();
37 }
38 
39 private: // for the remainder of this module
40 
41 enum LOG = false;
42 
43 struct OmfObjSymbol
44 {
45     char* name;
46     OmfObjModule* om;
47 
48     /// Predicate for `Array.sort`for name comparison
49     static int name_pred (scope const OmfObjSymbol** ppe1, scope const OmfObjSymbol** ppe2) nothrow @nogc pure
50     {
51         return strcmp((**ppe1).name, (**ppe2).name);
52     }
53 }
54 
55 alias OmfObjModules = Array!(OmfObjModule*);
56 alias OmfObjSymbols = Array!(OmfObjSymbol*);
57 
58 final class LibOMF : Library
59 {
60     OmfObjModules objmodules; // OmfObjModule[]
61     OmfObjSymbols objsymbols; // OmfObjSymbol[]
62     StringTable!(OmfObjSymbol*) tab;
63 
64     extern (D) this()
65     {
66         tab._init(14_000);
67     }
68 
69     /***************************************
70      * Add object module or library to the library.
71      * Examine the buffer to see which it is.
72      * If the buffer is NULL, use module_name as the file name
73      * and load the file.
74      */
75     override void addObject(const(char)[] module_name, const ubyte[] buffer)
76     {
77         static if (LOG)
78         {
79             printf("LibOMF::addObject(%.*s)\n", cast(int)module_name.length,
80                    module_name.ptr);
81         }
82 
83         void corrupt(int reason)
84         {
85             error("corrupt OMF object module %.*s %d",
86                   cast(int)module_name.length, module_name.ptr, reason);
87         }
88 
89         auto buf = buffer.ptr;
90         auto buflen = buffer.length;
91         if (!buf)
92         {
93             assert(module_name.length, "No module nor buffer provided to `addObject`");
94             // read file and take buffer ownership
95             auto data = readFile(Loc.initial, module_name).extractSlice();
96             buf = data.ptr;
97             buflen = data.length;
98         }
99         uint g_page_size;
100         ubyte* pstart = cast(ubyte*)buf;
101         bool islibrary = false;
102         /* See if it's an OMF library.
103          * Don't go by file extension.
104          */
105         struct LibHeader
106         {
107         align(1):
108             ubyte recTyp; // 0xF0
109             ushort pagesize;
110             uint lSymSeek;
111             ushort ndicpages;
112         }
113 
114         /* Determine if it is an OMF library, an OMF object module,
115          * or something else.
116          */
117         if (buflen < (LibHeader).sizeof)
118             return corrupt(__LINE__);
119         const lh = cast(const(LibHeader)*)buf;
120         if (lh.recTyp == 0xF0)
121         {
122             /* OMF library
123              * The modules are all at buf[g_page_size .. lh.lSymSeek]
124              */
125             islibrary = 1;
126             g_page_size = lh.pagesize + 3;
127             buf = cast(ubyte*)(pstart + g_page_size);
128             if (lh.lSymSeek > buflen || g_page_size > buflen)
129                 return corrupt(__LINE__);
130             buflen = lh.lSymSeek - g_page_size;
131         }
132         else if (lh.recTyp == '!' && memcmp(lh, "!<arch>\n".ptr, 8) == 0)
133         {
134             error("COFF libraries not supported");
135             return;
136         }
137         else
138         {
139             // Not a library, assume OMF object module
140             g_page_size = 16;
141         }
142         bool firstmodule = true;
143 
144         void addOmfObjModule(char* name, void* base, size_t length)
145         {
146             auto om = new OmfObjModule();
147             om.base = cast(ubyte*)base;
148             om.page = cast(ushort)((om.base - pstart) / g_page_size);
149             om.length = cast(uint)length;
150             /* Determine the name of the module
151              */
152             if (firstmodule && module_name && !islibrary)
153             {
154                 // Remove path and extension
155                 om.name = FileName.removeExt(FileName.name(module_name));
156             }
157             else
158             {
159                 /* Use THEADR name as module name,
160                  * removing path and extension.
161                  */
162                 om.name = FileName.removeExt(FileName.name(name.toDString()));
163             }
164             firstmodule = false;
165             this.objmodules.push(om);
166         }
167 
168         if (scanOmfLib(&addOmfObjModule, cast(void*)buf, buflen, g_page_size))
169             return corrupt(__LINE__);
170     }
171 
172     /*****************************************************************************/
173 
174     void addSymbol(OmfObjModule* om, const(char)[] name, int pickAny = 0)
175     {
176         assert(name.length == strlen(name.ptr));
177         static if (LOG)
178         {
179             printf("LibOMF::addSymbol(%.*s, %.*s, %d)\n",
180                 cast(int)om.name.length, om.name.ptr,
181                 cast(int)name.length, name.ptr, pickAny);
182         }
183         if (auto s = tab.insert(name, null))
184         {
185             auto os = new OmfObjSymbol();
186             os.name = cast(char*)Mem.check(strdup(name.ptr));
187             os.om = om;
188             s.value = os;
189             objsymbols.push(os);
190         }
191         else
192         {
193             // already in table
194             if (!pickAny)
195             {
196                 const s2 = tab.lookup(name);
197                 assert(s2);
198                 const os = s2.value;
199                 error("multiple definition of %.*s: %.*s and %.*s: %s",
200                     cast(int)om.name.length, om.name.ptr,
201                     cast(int)name.length, name.ptr,
202                     cast(int)os.om.name.length, os.om.name.ptr, os.name);
203             }
204         }
205     }
206 
207 private:
208     /************************************
209      * Scan single object module for dictionary symbols.
210      * Send those symbols to LibOMF::addSymbol().
211      */
212     void scanObjModule(OmfObjModule* om)
213     {
214         static if (LOG)
215         {
216             printf("LibMSCoff::scanObjModule(%s)\n", om.name.ptr);
217         }
218 
219         extern (D) void addSymbol(const(char)[] name, int pickAny)
220         {
221             this.addSymbol(om, name, pickAny);
222         }
223 
224         scanOmfObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc);
225     }
226 
227     /***********************************
228      * Calculates number of pages needed for dictionary
229      * Returns:
230      *      number of pages
231      */
232     ushort numDictPages(uint padding)
233     {
234         ushort ndicpages;
235         ushort bucksForHash;
236         ushort bucksForSize;
237         uint symSize = 0;
238         foreach (s; objsymbols)
239         {
240             symSize += (strlen(s.name) + 4) & ~1;
241         }
242         foreach (om; objmodules)
243         {
244             size_t len = om.name.length;
245             if (len > 0xFF)
246                 len += 2; // Digital Mars long name extension
247             symSize += (len + 4 + 1) & ~1;
248         }
249         bucksForHash = cast(ushort)((objsymbols.dim + objmodules.dim + HASHMOD - 3) / (HASHMOD - 2));
250         bucksForSize = cast(ushort)((symSize + BUCKETSIZE - padding - padding - 1) / (BUCKETSIZE - padding));
251         ndicpages = (bucksForHash > bucksForSize) ? bucksForHash : bucksForSize;
252         //printf("ndicpages = %u\n",ndicpages);
253         // Find prime number greater than ndicpages
254         __gshared uint* primes =
255         [
256             1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
257             47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103,
258             107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
259             163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
260             223, 227, 229, 233, 239, 241, 251, 257, 263, 269,
261             271, 277, 281, 283, 293, 307, 311, 313, 317, 331,
262             337, 347, 349, 353, 359, 367, 373, 379, 383, 389,
263             397, 401, 409, 419, 421, 431, 433, 439, 443, 449,
264             457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
265             //521,523,541,547,
266             0
267         ];
268         for (size_t i = 0; 1; i++)
269         {
270             if (primes[i] == 0)
271             {
272                 // Quick and easy way is out.
273                 // Now try and find first prime number > ndicpages
274                 uint prime;
275                 for (prime = (ndicpages + 1) | 1; 1; prime += 2)
276                 {
277                     // Determine if prime is prime
278                     for (uint u = 3; u < prime / 2; u += 2)
279                     {
280                         if ((prime / u) * u == prime)
281                             goto L1;
282                     }
283                     break;
284                 L1:
285                 }
286                 ndicpages = cast(ushort)prime;
287                 break;
288             }
289             if (primes[i] > ndicpages)
290             {
291                 ndicpages = cast(ushort)primes[i];
292                 break;
293             }
294         }
295         return ndicpages;
296     }
297 
298     /*******************************************
299      * Write the module and symbol names to the dictionary.
300      * Returns:
301      *      false   failure
302      */
303     bool FillDict(ubyte* bucketsP, ushort ndicpages)
304     {
305         // max size that will fit in dictionary
306         enum LIBIDMAX = (512 - 0x25 - 3 - 4);
307         ubyte[4 + LIBIDMAX + 2 + 1] entry;
308         //printf("FillDict()\n");
309         // Add each of the module names
310         foreach (om; objmodules)
311         {
312             ushort n = cast(ushort)om.name.length;
313             if (n > 255)
314             {
315                 entry[0] = 0xFF;
316                 entry[1] = 0;
317                 *cast(ushort*)(entry.ptr + 2) = cast(ushort)(n + 1);
318                 memcpy(entry.ptr + 4, om.name.ptr, n);
319                 n += 3;
320             }
321             else
322             {
323                 entry[0] = cast(ubyte)(1 + n);
324                 memcpy(entry.ptr + 1, om.name.ptr, n);
325             }
326             entry[n + 1] = '!';
327             *(cast(ushort*)(n + 2 + entry.ptr)) = om.page;
328             if (n & 1)
329                 entry[n + 2 + 2] = 0;
330             if (!EnterDict(bucketsP, ndicpages, entry.ptr, n + 1))
331                 return false;
332         }
333         // Sort the symbols
334         objsymbols.sort!(OmfObjSymbol.name_pred);
335         // Add each of the symbols
336         foreach (os; objsymbols)
337         {
338             ushort n = cast(ushort)strlen(os.name);
339             if (n > 255)
340             {
341                 entry[0] = 0xFF;
342                 entry[1] = 0;
343                 *cast(ushort*)(entry.ptr + 2) = n;
344                 memcpy(entry.ptr + 4, os.name, n);
345                 n += 3;
346             }
347             else
348             {
349                 entry[0] = cast(ubyte)n;
350                 memcpy(entry.ptr + 1, os.name, n);
351             }
352             *(cast(ushort*)(n + 1 + entry.ptr)) = os.om.page;
353             if ((n & 1) == 0)
354                 entry[n + 3] = 0;
355             if (!EnterDict(bucketsP, ndicpages, entry.ptr, n))
356             {
357                 return false;
358             }
359         }
360         return true;
361     }
362 
363     /**********************************************
364      * Create and write library to libbuf.
365      * The library consists of:
366      *      library header
367      *      object modules...
368      *      dictionary header
369      *      dictionary pages...
370      */
371     protected override void WriteLibToBuffer(OutBuffer* libbuf)
372     {
373         /* Scan each of the object modules for symbols
374          * to go into the dictionary
375          */
376         foreach (om; objmodules)
377         {
378             scanObjModule(om);
379         }
380         uint g_page_size = 16;
381         /* Calculate page size so that the number of pages
382          * fits in 16 bits. This is because object modules
383          * are indexed by page number, stored as an unsigned short.
384          */
385         while (1)
386         {
387         Lagain:
388             static if (LOG)
389             {
390                 printf("g_page_size = %d\n", g_page_size);
391             }
392             uint offset = g_page_size;
393             foreach (om; objmodules)
394             {
395                 uint page = offset / g_page_size;
396                 if (page > 0xFFFF)
397                 {
398                     // Page size is too small, double it and try again
399                     g_page_size *= 2;
400                     goto Lagain;
401                 }
402                 offset += OMFObjSize(om.base, om.length, om.name.ptr);
403                 // Round the size of the file up to the next page size
404                 // by filling with 0s
405                 uint n = (g_page_size - 1) & offset;
406                 if (n)
407                     offset += g_page_size - n;
408             }
409             break;
410         }
411         /* Leave one page of 0s at start as a dummy library header.
412          * Fill it in later with the real data.
413          */
414         libbuf.fill0(g_page_size);
415         /* Write each object module into the library
416          */
417         foreach (om; objmodules)
418         {
419             uint page = cast(uint)(libbuf.length / g_page_size);
420             assert(page <= 0xFFFF);
421             om.page = cast(ushort)page;
422             // Write out the object module om
423             writeOMFObj(libbuf, om.base, om.length, om.name.ptr);
424             // Round the size of the file up to the next page size
425             // by filling with 0s
426             uint n = (g_page_size - 1) & libbuf.length;
427             if (n)
428                 libbuf.fill0(g_page_size - n);
429         }
430         // File offset of start of dictionary
431         uint offset = cast(uint)libbuf.length;
432         // Write dictionary header, then round it to a BUCKETPAGE boundary
433         ushort size = (BUCKETPAGE - (cast(short)offset + 3)) & (BUCKETPAGE - 1);
434         libbuf.writeByte(0xF1);
435         libbuf.writeword(size);
436         libbuf.fill0(size);
437         // Create dictionary
438         ubyte* bucketsP = null;
439         ushort ndicpages;
440         ushort padding = 32;
441         for (;;)
442         {
443             ndicpages = numDictPages(padding);
444             static if (LOG)
445             {
446                 printf("ndicpages = %d\n", ndicpages);
447             }
448             // Allocate dictionary
449             if (bucketsP)
450                 bucketsP = cast(ubyte*)Mem.check(realloc(bucketsP, ndicpages * BUCKETPAGE));
451             else
452                 bucketsP = cast(ubyte*)Mem.check(malloc(ndicpages * BUCKETPAGE));
453             memset(bucketsP, 0, ndicpages * BUCKETPAGE);
454             for (uint u = 0; u < ndicpages; u++)
455             {
456                 // 'next available' slot
457                 bucketsP[u * BUCKETPAGE + HASHMOD] = (HASHMOD + 1) >> 1;
458             }
459             if (FillDict(bucketsP, ndicpages))
460                 break;
461             padding += 16; // try again with more margins
462         }
463         // Write dictionary
464         libbuf.write(bucketsP[0 .. ndicpages * BUCKETPAGE]);
465         if (bucketsP)
466             free(bucketsP);
467         // Create library header
468         struct Libheader
469         {
470         align(1):
471             ubyte recTyp;
472             ushort recLen;
473             uint trailerPosn;
474             ushort ndicpages;
475             ubyte flags;
476             uint filler;
477         }
478 
479         Libheader libHeader;
480         memset(&libHeader, 0, (Libheader).sizeof);
481         libHeader.recTyp = 0xF0;
482         libHeader.recLen = 0x0D;
483         libHeader.trailerPosn = offset + (3 + size);
484         libHeader.recLen = cast(ushort)(g_page_size - 3);
485         libHeader.ndicpages = ndicpages;
486         libHeader.flags = 1; // always case sensitive
487         // Write library header at start of buffer
488         memcpy(cast(void*)(*libbuf)[].ptr, &libHeader, (libHeader).sizeof);
489     }
490 }
491 
492 /*****************************************************************************/
493 /*****************************************************************************/
494 struct OmfObjModule
495 {
496     ubyte* base; // where are we holding it in memory
497     uint length; // in bytes
498     ushort page; // page module starts in output file
499     const(char)[] name; // module name, with terminating 0
500 }
501 
502 enum HASHMOD = 0x25;
503 enum BUCKETPAGE = 512;
504 enum BUCKETSIZE = (BUCKETPAGE - HASHMOD - 1);
505 
506 /*******************************************
507  * Write a single entry into dictionary.
508  * Returns:
509  *      false   failure
510  */
511 bool EnterDict(ubyte* bucketsP, ushort ndicpages, ubyte* entry, uint entrylen)
512 {
513     ushort uStartIndex;
514     ushort uStep;
515     ushort uStartPage;
516     ushort uPageStep;
517     ushort uIndex;
518     ushort uPage;
519     ushort n;
520     uint u;
521     uint nbytes;
522     ubyte* aP;
523     ubyte* zP;
524     aP = entry;
525     zP = aP + entrylen; // point at last char in identifier
526     uStartPage = 0;
527     uPageStep = 0;
528     uStartIndex = 0;
529     uStep = 0;
530     u = entrylen;
531     while (u--)
532     {
533         uStartPage  = rol!(ushort)(uStartPage, 2)  ^ (*aP   | 0x20);
534         uStep       = ror!(ushort)(uStep, 2)       ^ (*aP++ | 0x20);
535         uStartIndex = ror!(ushort)(uStartIndex, 2) ^ (*zP   | 0x20);
536         uPageStep   = rol!(ushort)(uPageStep, 2)   ^ (*zP-- | 0x20);
537     }
538     uStartPage %= ndicpages;
539     uPageStep %= ndicpages;
540     if (uPageStep == 0)
541         uPageStep++;
542     uStartIndex %= HASHMOD;
543     uStep %= HASHMOD;
544     if (uStep == 0)
545         uStep++;
546     uPage = uStartPage;
547     uIndex = uStartIndex;
548     // number of bytes in entry
549     nbytes = 1 + entrylen + 2;
550     if (entrylen > 255)
551         nbytes += 2;
552     while (1)
553     {
554         aP = &bucketsP[uPage * BUCKETPAGE];
555         uStartIndex = uIndex;
556         while (1)
557         {
558             if (0 == aP[uIndex])
559             {
560                 // n = next available position in this page
561                 n = aP[HASHMOD] << 1;
562                 assert(n > HASHMOD);
563                 // if off end of this page
564                 if (n + nbytes > BUCKETPAGE)
565                 {
566                     aP[HASHMOD] = 0xFF;
567                     break;
568                     // next page
569                 }
570                 else
571                 {
572                     aP[uIndex] = cast(ubyte)(n >> 1);
573                     memcpy((aP + n), entry, nbytes);
574                     aP[HASHMOD] += (nbytes + 1) >> 1;
575                     if (aP[HASHMOD] == 0)
576                         aP[HASHMOD] = 0xFF;
577                     return true;
578                 }
579             }
580             uIndex += uStep;
581             uIndex %= 0x25;
582             /*if (uIndex > 0x25)
583              uIndex -= 0x25;*/
584             if (uIndex == uStartIndex)
585                 break;
586         }
587         uPage += uPageStep;
588         if (uPage >= ndicpages)
589             uPage -= ndicpages;
590         if (uPage == uStartPage)
591             break;
592     }
593     return false;
594 }