1 /**
2  * A library in the OMF format, a legacy format for 32-bit Windows.
3  *
4  * Copyright:   Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libomf.d, _libomf.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libomf.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libomf.d
10  */
11 
12 module dmd.libomf;
13 
14 version(Windows):
15 
16 import core.stdc.stdio;
17 import core.stdc.string;
18 import core.stdc.stdlib;
19 
20 import dmd.globals;
21 import dmd.utils;
22 import dmd.lib;
23 
24 import dmd.root.array;
25 import dmd.root.file;
26 import dmd.root.filename;
27 import dmd.root.rmem;
28 import dmd.root.outbuffer;
29 import dmd.root.string;
30 import dmd.root.stringtable;
31 
32 import dmd.scanomf;
33 
34 // Entry point (only public symbol in this module).
35 extern (C++) Library LibOMF_factory()
36 {
37     return new LibOMF();
38 }
39 
40 private: // for the remainder of this module
41 
42 enum LOG = false;
43 
44 struct OmfObjSymbol
45 {
46     char* name;
47     OmfObjModule* om;
48 
49     /// Predicate for `Array.sort`for name comparison
50     static int name_pred (scope const OmfObjSymbol** ppe1, scope const OmfObjSymbol** ppe2) nothrow @nogc pure
51     {
52         return strcmp((**ppe1).name, (**ppe2).name);
53     }
54 }
55 
56 alias OmfObjModules = Array!(OmfObjModule*);
57 alias OmfObjSymbols = Array!(OmfObjSymbol*);
58 
59 extern (C) uint _rotl(uint value, int shift);
60 extern (C) uint _rotr(uint value, int shift);
61 
62 final class LibOMF : Library
63 {
64     OmfObjModules objmodules; // OmfObjModule[]
65     OmfObjSymbols objsymbols; // OmfObjSymbol[]
66     StringTable!(OmfObjSymbol*) tab;
67 
68     extern (D) this()
69     {
70         tab._init(14000);
71     }
72 
73     /***************************************
74      * Add object module or library to the library.
75      * Examine the buffer to see which it is.
76      * If the buffer is NULL, use module_name as the file name
77      * and load the file.
78      */
79     override void addObject(const(char)[] module_name, const ubyte[] buffer)
80     {
81         static if (LOG)
82         {
83             printf("LibOMF::addObject(%.*s)\n", cast(int)module_name.length,
84                    module_name.ptr);
85         }
86 
87         void corrupt(int reason)
88         {
89             error("corrupt OMF object module %s %d",
90                   cast(int)module_name.length, module_name.ptr, reason);
91         }
92 
93         auto buf = buffer.ptr;
94         auto buflen = buffer.length;
95         if (!buf)
96         {
97             assert(module_name.length, "No module nor buffer provided to `addObject`");
98             // read file and take buffer ownership
99             auto data = readFile(Loc.initial, module_name).extractSlice();
100             buf = data.ptr;
101             buflen = data.length;
102         }
103         uint g_page_size;
104         ubyte* pstart = cast(ubyte*)buf;
105         bool islibrary = false;
106         /* See if it's an OMF library.
107          * Don't go by file extension.
108          */
109         struct LibHeader
110         {
111         align(1):
112             ubyte recTyp; // 0xF0
113             ushort pagesize;
114             uint lSymSeek;
115             ushort ndicpages;
116         }
117 
118         /* Determine if it is an OMF library, an OMF object module,
119          * or something else.
120          */
121         if (buflen < (LibHeader).sizeof)
122             return corrupt(__LINE__);
123         const lh = cast(const(LibHeader)*)buf;
124         if (lh.recTyp == 0xF0)
125         {
126             /* OMF library
127              * The modules are all at buf[g_page_size .. lh.lSymSeek]
128              */
129             islibrary = 1;
130             g_page_size = lh.pagesize + 3;
131             buf = cast(ubyte*)(pstart + g_page_size);
132             if (lh.lSymSeek > buflen || g_page_size > buflen)
133                 return corrupt(__LINE__);
134             buflen = lh.lSymSeek - g_page_size;
135         }
136         else if (lh.recTyp == '!' && memcmp(lh, "!<arch>\n".ptr, 8) == 0)
137         {
138             error("COFF libraries not supported");
139             return;
140         }
141         else
142         {
143             // Not a library, assume OMF object module
144             g_page_size = 16;
145         }
146         bool firstmodule = true;
147 
148         void addOmfObjModule(char* name, void* base, size_t length)
149         {
150             auto om = new OmfObjModule();
151             om.base = cast(ubyte*)base;
152             om.page = cast(ushort)((om.base - pstart) / g_page_size);
153             om.length = cast(uint)length;
154             /* Determine the name of the module
155              */
156             if (firstmodule && module_name && !islibrary)
157             {
158                 // Remove path and extension
159                 om.name = FileName.removeExt(FileName.name(module_name));
160             }
161             else
162             {
163                 /* Use THEADR name as module name,
164                  * removing path and extension.
165                  */
166                 om.name = FileName.removeExt(FileName.name(name.toDString()));
167             }
168             firstmodule = false;
169             this.objmodules.push(om);
170         }
171 
172         if (scanOmfLib(&addOmfObjModule, cast(void*)buf, buflen, g_page_size))
173             return corrupt(__LINE__);
174     }
175 
176     /*****************************************************************************/
177 
178     void addSymbol(OmfObjModule* om, const(char)[] name, int pickAny = 0)
179     {
180         assert(name.length == strlen(name.ptr));
181         static if (LOG)
182         {
183             printf("LibOMF::addSymbol(%.*s, %.*s, %d)\n",
184                 cast(int)om.name.length, om.name.ptr,
185                 cast(int)name.length, name.ptr, pickAny);
186         }
187         if (auto s = tab.insert(name, null))
188         {
189             auto os = new OmfObjSymbol();
190             os.name = cast(char*)Mem.check(strdup(name.ptr));
191             os.om = om;
192             s.value = os;
193             objsymbols.push(os);
194         }
195         else
196         {
197             // already in table
198             if (!pickAny)
199             {
200                 const s2 = tab.lookup(name);
201                 assert(s2);
202                 const os = s2.value;
203                 error("multiple definition of %.*s: %.*s and %.*s: %s",
204                     cast(int)om.name.length, om.name.ptr,
205                     cast(int)name.length, name.ptr,
206                     cast(int)os.om.name.length, os.om.name.ptr, os.name);
207             }
208         }
209     }
210 
211 private:
212     /************************************
213      * Scan single object module for dictionary symbols.
214      * Send those symbols to LibOMF::addSymbol().
215      */
216     void scanObjModule(OmfObjModule* om)
217     {
218         static if (LOG)
219         {
220             printf("LibMSCoff::scanObjModule(%s)\n", om.name.ptr);
221         }
222 
223         extern (D) void addSymbol(const(char)[] name, int pickAny)
224         {
225             this.addSymbol(om, name, pickAny);
226         }
227 
228         scanOmfObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc);
229     }
230 
231     /***********************************
232      * Calculates number of pages needed for dictionary
233      * Returns:
234      *      number of pages
235      */
236     ushort numDictPages(uint padding)
237     {
238         ushort ndicpages;
239         ushort bucksForHash;
240         ushort bucksForSize;
241         uint symSize = 0;
242         foreach (s; objsymbols)
243         {
244             symSize += (strlen(s.name) + 4) & ~1;
245         }
246         foreach (om; objmodules)
247         {
248             size_t len = om.name.length;
249             if (len > 0xFF)
250                 len += 2; // Digital Mars long name extension
251             symSize += (len + 4 + 1) & ~1;
252         }
253         bucksForHash = cast(ushort)((objsymbols.dim + objmodules.dim + HASHMOD - 3) / (HASHMOD - 2));
254         bucksForSize = cast(ushort)((symSize + BUCKETSIZE - padding - padding - 1) / (BUCKETSIZE - padding));
255         ndicpages = (bucksForHash > bucksForSize) ? bucksForHash : bucksForSize;
256         //printf("ndicpages = %u\n",ndicpages);
257         // Find prime number greater than ndicpages
258         __gshared uint* primes =
259         [
260             1, 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43,
261             47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103,
262             107, 109, 113, 127, 131, 137, 139, 149, 151, 157,
263             163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
264             223, 227, 229, 233, 239, 241, 251, 257, 263, 269,
265             271, 277, 281, 283, 293, 307, 311, 313, 317, 331,
266             337, 347, 349, 353, 359, 367, 373, 379, 383, 389,
267             397, 401, 409, 419, 421, 431, 433, 439, 443, 449,
268             457, 461, 463, 467, 479, 487, 491, 499, 503, 509,
269             //521,523,541,547,
270             0
271         ];
272         for (size_t i = 0; 1; i++)
273         {
274             if (primes[i] == 0)
275             {
276                 // Quick and easy way is out.
277                 // Now try and find first prime number > ndicpages
278                 uint prime;
279                 for (prime = (ndicpages + 1) | 1; 1; prime += 2)
280                 {
281                     // Determine if prime is prime
282                     for (uint u = 3; u < prime / 2; u += 2)
283                     {
284                         if ((prime / u) * u == prime)
285                             goto L1;
286                     }
287                     break;
288                 L1:
289                 }
290                 ndicpages = cast(ushort)prime;
291                 break;
292             }
293             if (primes[i] > ndicpages)
294             {
295                 ndicpages = cast(ushort)primes[i];
296                 break;
297             }
298         }
299         return ndicpages;
300     }
301 
302     /*******************************************
303      * Write the module and symbol names to the dictionary.
304      * Returns:
305      *      false   failure
306      */
307     bool FillDict(ubyte* bucketsP, ushort ndicpages)
308     {
309         // max size that will fit in dictionary
310         enum LIBIDMAX = (512 - 0x25 - 3 - 4);
311         ubyte[4 + LIBIDMAX + 2 + 1] entry;
312         //printf("FillDict()\n");
313         // Add each of the module names
314         foreach (om; objmodules)
315         {
316             ushort n = cast(ushort)om.name.length;
317             if (n > 255)
318             {
319                 entry[0] = 0xFF;
320                 entry[1] = 0;
321                 *cast(ushort*)(entry.ptr + 2) = cast(ushort)(n + 1);
322                 memcpy(entry.ptr + 4, om.name.ptr, n);
323                 n += 3;
324             }
325             else
326             {
327                 entry[0] = cast(ubyte)(1 + n);
328                 memcpy(entry.ptr + 1, om.name.ptr, n);
329             }
330             entry[n + 1] = '!';
331             *(cast(ushort*)(n + 2 + entry.ptr)) = om.page;
332             if (n & 1)
333                 entry[n + 2 + 2] = 0;
334             if (!EnterDict(bucketsP, ndicpages, entry.ptr, n + 1))
335                 return false;
336         }
337         // Sort the symbols
338         objsymbols.sort!(OmfObjSymbol.name_pred);
339         // Add each of the symbols
340         foreach (os; objsymbols)
341         {
342             ushort n = cast(ushort)strlen(os.name);
343             if (n > 255)
344             {
345                 entry[0] = 0xFF;
346                 entry[1] = 0;
347                 *cast(ushort*)(entry.ptr + 2) = n;
348                 memcpy(entry.ptr + 4, os.name, n);
349                 n += 3;
350             }
351             else
352             {
353                 entry[0] = cast(ubyte)n;
354                 memcpy(entry.ptr + 1, os.name, n);
355             }
356             *(cast(ushort*)(n + 1 + entry.ptr)) = os.om.page;
357             if ((n & 1) == 0)
358                 entry[n + 3] = 0;
359             if (!EnterDict(bucketsP, ndicpages, entry.ptr, n))
360             {
361                 return false;
362             }
363         }
364         return true;
365     }
366 
367     /**********************************************
368      * Create and write library to libbuf.
369      * The library consists of:
370      *      library header
371      *      object modules...
372      *      dictionary header
373      *      dictionary pages...
374      */
375     protected override void WriteLibToBuffer(OutBuffer* libbuf)
376     {
377         /* Scan each of the object modules for symbols
378          * to go into the dictionary
379          */
380         foreach (om; objmodules)
381         {
382             scanObjModule(om);
383         }
384         uint g_page_size = 16;
385         /* Calculate page size so that the number of pages
386          * fits in 16 bits. This is because object modules
387          * are indexed by page number, stored as an unsigned short.
388          */
389         while (1)
390         {
391         Lagain:
392             static if (LOG)
393             {
394                 printf("g_page_size = %d\n", g_page_size);
395             }
396             uint offset = g_page_size;
397             foreach (om; objmodules)
398             {
399                 uint page = offset / g_page_size;
400                 if (page > 0xFFFF)
401                 {
402                     // Page size is too small, double it and try again
403                     g_page_size *= 2;
404                     goto Lagain;
405                 }
406                 offset += OMFObjSize(om.base, om.length, om.name.ptr);
407                 // Round the size of the file up to the next page size
408                 // by filling with 0s
409                 uint n = (g_page_size - 1) & offset;
410                 if (n)
411                     offset += g_page_size - n;
412             }
413             break;
414         }
415         /* Leave one page of 0s at start as a dummy library header.
416          * Fill it in later with the real data.
417          */
418         libbuf.fill0(g_page_size);
419         /* Write each object module into the library
420          */
421         foreach (om; objmodules)
422         {
423             uint page = cast(uint)(libbuf.length / g_page_size);
424             assert(page <= 0xFFFF);
425             om.page = cast(ushort)page;
426             // Write out the object module om
427             writeOMFObj(libbuf, om.base, om.length, om.name.ptr);
428             // Round the size of the file up to the next page size
429             // by filling with 0s
430             uint n = (g_page_size - 1) & libbuf.length;
431             if (n)
432                 libbuf.fill0(g_page_size - n);
433         }
434         // File offset of start of dictionary
435         uint offset = cast(uint)libbuf.length;
436         // Write dictionary header, then round it to a BUCKETPAGE boundary
437         ushort size = (BUCKETPAGE - (cast(short)offset + 3)) & (BUCKETPAGE - 1);
438         libbuf.writeByte(0xF1);
439         libbuf.writeword(size);
440         libbuf.fill0(size);
441         // Create dictionary
442         ubyte* bucketsP = null;
443         ushort ndicpages;
444         ushort padding = 32;
445         for (;;)
446         {
447             ndicpages = numDictPages(padding);
448             static if (LOG)
449             {
450                 printf("ndicpages = %d\n", ndicpages);
451             }
452             // Allocate dictionary
453             if (bucketsP)
454                 bucketsP = cast(ubyte*)Mem.check(realloc(bucketsP, ndicpages * BUCKETPAGE));
455             else
456                 bucketsP = cast(ubyte*)Mem.check(malloc(ndicpages * BUCKETPAGE));
457             memset(bucketsP, 0, ndicpages * BUCKETPAGE);
458             for (uint u = 0; u < ndicpages; u++)
459             {
460                 // 'next available' slot
461                 bucketsP[u * BUCKETPAGE + HASHMOD] = (HASHMOD + 1) >> 1;
462             }
463             if (FillDict(bucketsP, ndicpages))
464                 break;
465             padding += 16; // try again with more margins
466         }
467         // Write dictionary
468         libbuf.write(bucketsP[0 .. ndicpages * BUCKETPAGE]);
469         if (bucketsP)
470             free(bucketsP);
471         // Create library header
472         struct Libheader
473         {
474         align(1):
475             ubyte recTyp;
476             ushort recLen;
477             uint trailerPosn;
478             ushort ndicpages;
479             ubyte flags;
480             uint filler;
481         }
482 
483         Libheader libHeader;
484         memset(&libHeader, 0, (Libheader).sizeof);
485         libHeader.recTyp = 0xF0;
486         libHeader.recLen = 0x0D;
487         libHeader.trailerPosn = offset + (3 + size);
488         libHeader.recLen = cast(ushort)(g_page_size - 3);
489         libHeader.ndicpages = ndicpages;
490         libHeader.flags = 1; // always case sensitive
491         // Write library header at start of buffer
492         memcpy(cast(void*)(*libbuf)[].ptr, &libHeader, (libHeader).sizeof);
493     }
494 }
495 
496 /*****************************************************************************/
497 /*****************************************************************************/
498 struct OmfObjModule
499 {
500     ubyte* base; // where are we holding it in memory
501     uint length; // in bytes
502     ushort page; // page module starts in output file
503     const(char)[] name; // module name, with terminating 0
504 }
505 
506 enum HASHMOD = 0x25;
507 enum BUCKETPAGE = 512;
508 enum BUCKETSIZE = (BUCKETPAGE - HASHMOD - 1);
509 
510 /*******************************************
511  * Write a single entry into dictionary.
512  * Returns:
513  *      false   failure
514  */
515 bool EnterDict(ubyte* bucketsP, ushort ndicpages, ubyte* entry, uint entrylen)
516 {
517     ushort uStartIndex;
518     ushort uStep;
519     ushort uStartPage;
520     ushort uPageStep;
521     ushort uIndex;
522     ushort uPage;
523     ushort n;
524     uint u;
525     uint nbytes;
526     ubyte* aP;
527     ubyte* zP;
528     aP = entry;
529     zP = aP + entrylen; // point at last char in identifier
530     uStartPage = 0;
531     uPageStep = 0;
532     uStartIndex = 0;
533     uStep = 0;
534     u = entrylen;
535     while (u--)
536     {
537         uStartPage = cast(ushort)_rotl(uStartPage, 2) ^ (*aP | 0x20);
538         uStep = cast(ushort)_rotr(uStep, 2) ^ (*aP++ | 0x20);
539         uStartIndex = cast(ushort)_rotr(uStartIndex, 2) ^ (*zP | 0x20);
540         uPageStep = cast(ushort)_rotl(uPageStep, 2) ^ (*zP-- | 0x20);
541     }
542     uStartPage %= ndicpages;
543     uPageStep %= ndicpages;
544     if (uPageStep == 0)
545         uPageStep++;
546     uStartIndex %= HASHMOD;
547     uStep %= HASHMOD;
548     if (uStep == 0)
549         uStep++;
550     uPage = uStartPage;
551     uIndex = uStartIndex;
552     // number of bytes in entry
553     nbytes = 1 + entrylen + 2;
554     if (entrylen > 255)
555         nbytes += 2;
556     while (1)
557     {
558         aP = &bucketsP[uPage * BUCKETPAGE];
559         uStartIndex = uIndex;
560         while (1)
561         {
562             if (0 == aP[uIndex])
563             {
564                 // n = next available position in this page
565                 n = aP[HASHMOD] << 1;
566                 assert(n > HASHMOD);
567                 // if off end of this page
568                 if (n + nbytes > BUCKETPAGE)
569                 {
570                     aP[HASHMOD] = 0xFF;
571                     break;
572                     // next page
573                 }
574                 else
575                 {
576                     aP[uIndex] = cast(ubyte)(n >> 1);
577                     memcpy((aP + n), entry, nbytes);
578                     aP[HASHMOD] += (nbytes + 1) >> 1;
579                     if (aP[HASHMOD] == 0)
580                         aP[HASHMOD] = 0xFF;
581                     return true;
582                 }
583             }
584             uIndex += uStep;
585             uIndex %= 0x25;
586             /*if (uIndex > 0x25)
587              uIndex -= 0x25;*/
588             if (uIndex == uStartIndex)
589                 break;
590         }
591         uPage += uPageStep;
592         if (uPage >= ndicpages)
593             uPage -= ndicpages;
594         if (uPage == uStartPage)
595             break;
596     }
597     return false;
598 }