1 /**
2  * A library in the COFF format, used on 32-bit and 64-bit Windows targets.
3  *
4  * Copyright:   Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libmscoff.d, _libmscoff.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libmscoff.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libmscoff.d
10  */
11 
12 module dmd.libmscoff;
13 
14 version(Windows):
15 
16 import core.stdc.stdlib;
17 import core.stdc..string;
18 import core.stdc.time;
19 import core.stdc.stdio;
20 import core.stdc..string;
21 
22 import core.sys.windows.stat;
23 
24 import dmd.globals;
25 import dmd.lib;
26 import dmd.utils;
27 
28 import dmd.root.array;
29 import dmd.root.file;
30 import dmd.root.filename;
31 import dmd.root.outbuffer;
32 import dmd.root.port;
33 import dmd.root.rmem;
34 import dmd.root..string;
35 import dmd.root.stringtable;
36 
37 import dmd.scanmscoff;
38 
39 // Entry point (only public symbol in this module).
40 public extern (C++) Library LibMSCoff_factory()
41 {
42     return new LibMSCoff();
43 }
44 
45 private: // for the remainder of this module
46 
47 enum LOG = false;
48 
49 alias stat_t = struct_stat;
50 
51 struct MSCoffObjSymbol
52 {
53     const(char)[] name;         // still has a terminating 0
54     MSCoffObjModule* om;
55 
56     /// Predicate for `Array.sort`for name comparison
57     static int name_pred (scope const MSCoffObjSymbol** ppe1, scope const MSCoffObjSymbol** ppe2) nothrow @nogc pure
58     {
59         return dstrcmp((**ppe1).name, (**ppe2).name);
60     }
61 
62     /// Predicate for `Array.sort`for offset comparison
63     static int offset_pred (scope const MSCoffObjSymbol** ppe1, scope const MSCoffObjSymbol** ppe2) nothrow @nogc pure
64     {
65         return (**ppe1).om.offset - (**ppe2).om.offset;
66     }
67 }
68 
69 alias MSCoffObjModules = Array!(MSCoffObjModule*);
70 alias MSCoffObjSymbols = Array!(MSCoffObjSymbol*);
71 
72 final class LibMSCoff : Library
73 {
74     MSCoffObjModules objmodules; // MSCoffObjModule[]
75     MSCoffObjSymbols objsymbols; // MSCoffObjSymbol[]
76 
77     /***************************************
78      * Add object module or library to the library.
79      * Examine the buffer to see which it is.
80      * If the buffer is NULL, use module_name as the file name
81      * and load the file.
82      */
83     override void addObject(const(char)[] module_name, const ubyte[] buffer)
84     {
85         static if (LOG)
86         {
87             printf("LibMSCoff::addObject(%.*s)\n", cast(int)module_name.length,
88                    module_name.ptr);
89         }
90 
91         void corrupt(int reason)
92         {
93             error("corrupt MS Coff object module %.*s %d",
94                   cast(int)module_name.length, module_name.ptr, reason);
95         }
96 
97         int fromfile = 0;
98         auto buf = buffer.ptr;
99         auto buflen = buffer.length;
100         if (!buf)
101         {
102             assert(module_name.length, "No module nor buffer provided to `addObject`");
103             // read file and take buffer ownership
104             auto data = readFile(Loc.initial, module_name).extractSlice();
105             buf = data.ptr;
106             buflen = data.length;
107             fromfile = 1;
108         }
109         if (buflen < 16)
110         {
111             static if (LOG)
112             {
113                 printf("buf = %p, buflen = %d\n", buf, buflen);
114             }
115             return corrupt(__LINE__);
116         }
117         if (memcmp(buf, "!<arch>\n".ptr, 8) == 0)
118         {
119             /* It's a library file.
120              * Pull each object module out of the library and add it
121              * to the object module array.
122              */
123             static if (LOG)
124             {
125                 printf("archive, buf = %p, buflen = %d\n", buf, buflen);
126             }
127             MSCoffLibHeader* flm = null; // first linker member
128             MSCoffLibHeader* slm = null; // second linker member
129             uint number_of_members = 0;
130             uint* member_file_offsets = null;
131             uint number_of_symbols = 0;
132             ushort* indices = null;
133             char* string_table = null;
134             size_t string_table_length = 0;
135             MSCoffLibHeader* lnm = null; // longname member
136             char* longnames = null;
137             size_t longnames_length = 0;
138             size_t offset = 8;
139             size_t mstart = objmodules.dim;
140             while (1)
141             {
142                 offset = (offset + 1) & ~1; // round to even boundary
143                 if (offset >= buflen)
144                     break;
145                 if (offset + MSCoffLibHeader.sizeof >= buflen)
146                     return corrupt(__LINE__);
147                 MSCoffLibHeader* header = cast(MSCoffLibHeader*)(cast(ubyte*)buf + offset);
148                 offset += MSCoffLibHeader.sizeof;
149                 char* endptr = null;
150                 uint size = strtoul(cast(char*)header.file_size, &endptr, 10);
151                 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ')
152                     return corrupt(__LINE__);
153                 if (offset + size > buflen)
154                     return corrupt(__LINE__);
155                 //printf("header.object_name = '%.*s'\n", cast(int)MSCOFF_OBJECT_NAME_SIZE, header.object_name);
156                 if (memcmp(cast(char*)header.object_name, cast(char*)"/               ", MSCOFF_OBJECT_NAME_SIZE) == 0)
157                 {
158                     if (!flm)
159                     {
160                         // First Linker Member, which is ignored
161                         flm = header;
162                     }
163                     else if (!slm)
164                     {
165                         // Second Linker Member, which we require even though the format doesn't require it
166                         slm = header;
167                         if (size < 4 + 4)
168                             return corrupt(__LINE__);
169                         number_of_members = Port.readlongLE(cast(char*)buf + offset);
170                         member_file_offsets = cast(uint*)(cast(char*)buf + offset + 4);
171                         if (size < 4 + number_of_members * 4 + 4)
172                             return corrupt(__LINE__);
173                         number_of_symbols = Port.readlongLE(cast(char*)buf + offset + 4 + number_of_members * 4);
174                         indices = cast(ushort*)(cast(char*)buf + offset + 4 + number_of_members * 4 + 4);
175                         string_table = cast(char*)(cast(char*)buf + offset + 4 + number_of_members * 4 + 4 + number_of_symbols * 2);
176                         if (size <= (4 + number_of_members * 4 + 4 + number_of_symbols * 2))
177                             return corrupt(__LINE__);
178                         string_table_length = size - (4 + number_of_members * 4 + 4 + number_of_symbols * 2);
179                         /* The number of strings in the string_table must be number_of_symbols; check it
180                          * The strings must also be in ascending lexical order; not checked.
181                          */
182                         size_t i = 0;
183                         for (uint n = 0; n < number_of_symbols; n++)
184                         {
185                             while (1)
186                             {
187                                 if (i >= string_table_length)
188                                     return corrupt(__LINE__);
189                                 if (!string_table[i++])
190                                     break;
191                             }
192                         }
193                         if (i != string_table_length)
194                             return corrupt(__LINE__);
195                     }
196                 }
197                 else if (memcmp(cast(char*)header.object_name, cast(char*)"//              ", MSCOFF_OBJECT_NAME_SIZE) == 0)
198                 {
199                     if (!lnm)
200                     {
201                         lnm = header;
202                         longnames = cast(char*)buf + offset;
203                         longnames_length = size;
204                     }
205                 }
206                 else
207                 {
208                     if (!slm)
209                         return corrupt(__LINE__);
210                     version (none)
211                     {
212                         // Microsoft Spec says longnames member must appear, but Microsoft Lib says otherwise
213                         if (!lnm)
214                             return corrupt(__LINE__);
215                     }
216                     auto om = new MSCoffObjModule();
217                     // Include MSCoffLibHeader in base[0..length], so we don't have to repro it
218                     om.base = cast(ubyte*)buf + offset - MSCoffLibHeader.sizeof;
219                     om.length = cast(uint)(size + MSCoffLibHeader.sizeof);
220                     om.offset = 0;
221                     if (header.object_name[0] == '/')
222                     {
223                         /* Pick long name out of longnames[]
224                          */
225                         uint foff = strtoul(cast(char*)header.object_name + 1, &endptr, 10);
226                         uint i;
227                         for (i = 0; 1; i++)
228                         {
229                             if (foff + i >= longnames_length)
230                                 return corrupt(__LINE__);
231                             char c = longnames[foff + i];
232                             if (c == 0)
233                                 break;
234                         }
235                         char* oname = cast(char*)Mem.check(malloc(i + 1));
236                         memcpy(oname, longnames + foff, i);
237                         oname[i] = 0;
238                         om.name = oname[0 .. i];
239                         //printf("\tname = '%s'\n", om.name);
240                     }
241                     else
242                     {
243                         /* Pick short name out of header
244                          */
245                         char* oname = cast(char*)Mem.check(malloc(MSCOFF_OBJECT_NAME_SIZE));
246                         int i;
247                         for (i = 0; 1; i++)
248                         {
249                             if (i == MSCOFF_OBJECT_NAME_SIZE)
250                                 return corrupt(__LINE__);
251                             char c = header.object_name[i];
252                             if (c == '/')
253                             {
254                                 oname[i] = 0;
255                                 break;
256                             }
257                             oname[i] = c;
258                         }
259                         om.name = oname[0 .. i];
260                     }
261                     om.file_time = strtoul(cast(char*)header.file_time, &endptr, 10);
262                     om.user_id = strtoul(cast(char*)header.user_id, &endptr, 10);
263                     om.group_id = strtoul(cast(char*)header.group_id, &endptr, 10);
264                     om.file_mode = strtoul(cast(char*)header.file_mode, &endptr, 8);
265                     om.scan = 0; // don't scan object module for symbols
266                     objmodules.push(om);
267                 }
268                 offset += size;
269             }
270             if (offset != buflen)
271                 return corrupt(__LINE__);
272             /* Scan the library's symbol table, and insert it into our own.
273              * We use this instead of rescanning the object module, because
274              * the library's creator may have a different idea of what symbols
275              * go into the symbol table than we do.
276              * This is also probably faster.
277              */
278             if (!slm)
279                 return corrupt(__LINE__);
280             char* s = string_table;
281             for (uint i = 0; i < number_of_symbols; i++)
282             {
283                 const(char)[] name = s.toDString();
284                 s += name.length + 1;
285                 uint memi = indices[i] - 1;
286                 if (memi >= number_of_members)
287                     return corrupt(__LINE__);
288                 uint moff = member_file_offsets[memi];
289                 for (size_t m = mstart; 1; m++)
290                 {
291                     if (m == objmodules.dim)
292                         return corrupt(__LINE__);       // didn't find it
293                     MSCoffObjModule* om = objmodules[m];
294                     //printf("\tom offset = x%x\n", (char *)om.base - (char *)buf);
295                     if (moff == cast(char*)om.base - cast(char*)buf)
296                     {
297                         addSymbol(om, name, 1);
298                         //if (mstart == m)
299                         //    mstart++;
300                         break;
301                     }
302                 }
303             }
304             return;
305         }
306         /* It's an object module
307          */
308         auto om = new MSCoffObjModule();
309         om.base = cast(ubyte*)buf;
310         om.length = cast(uint)buflen;
311         om.offset = 0;
312         // remove path, but not extension
313         om.name = global.params.preservePaths ? module_name : FileName.name(module_name);
314         om.scan = 1;
315         if (fromfile)
316         {
317             stat_t statbuf;
318             int i = module_name.toCStringThen!(name => stat(name.ptr, &statbuf));
319             if (i == -1) // error, errno is set
320                 return corrupt(__LINE__);
321             om.file_time = statbuf.st_ctime;
322             om.user_id = statbuf.st_uid;
323             om.group_id = statbuf.st_gid;
324             om.file_mode = statbuf.st_mode;
325         }
326         else
327         {
328             /* Mock things up for the object module file that never was
329              * actually written out.
330              */
331             time_t file_time = 0;
332             time(&file_time);
333             om.file_time = cast(long)file_time;
334             om.user_id = 0; // meaningless on Windows
335             om.group_id = 0; // meaningless on Windows
336             om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
337         }
338         objmodules.push(om);
339     }
340 
341     /*****************************************************************************/
342 
343     void addSymbol(MSCoffObjModule* om, const(char)[] name, int pickAny = 0)
344     {
345         static if (LOG)
346         {
347             printf("LibMSCoff::addSymbol(%s, %s, %d)\n", om.name.ptr, name, pickAny);
348         }
349         auto os = new MSCoffObjSymbol();
350         os.name = xarraydup(name);
351         os.om = om;
352         objsymbols.push(os);
353     }
354 
355 private:
356     /************************************
357      * Scan single object module for dictionary symbols.
358      * Send those symbols to LibMSCoff::addSymbol().
359      */
360     void scanObjModule(MSCoffObjModule* om)
361     {
362         static if (LOG)
363         {
364             printf("LibMSCoff::scanObjModule(%s)\n", om.name.ptr);
365         }
366 
367         extern (D) void addSymbol(const(char)[] name, int pickAny)
368         {
369             this.addSymbol(om, name, pickAny);
370         }
371 
372         scanMSCoffObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc);
373     }
374 
375     /*****************************************************************************/
376     /*****************************************************************************/
377     /**********************************************
378      * Create and write library to libbuf.
379      * The library consists of:
380      *      !<arch>\n
381      *      header
382      *      1st Linker Member
383      *      Header
384      *      2nd Linker Member
385      *      Header
386      *      Longnames Member
387      *      object modules...
388      */
389     protected override void WriteLibToBuffer(OutBuffer* libbuf)
390     {
391         static if (LOG)
392         {
393             printf("LibElf::WriteLibToBuffer()\n");
394         }
395         assert(MSCoffLibHeader.sizeof == 60);
396         /************* Scan Object Modules for Symbols ******************/
397         for (size_t i = 0; i < objmodules.dim; i++)
398         {
399             MSCoffObjModule* om = objmodules[i];
400             if (om.scan)
401             {
402                 scanObjModule(om);
403             }
404         }
405         /************* Determine longnames size ******************/
406         /* The longnames section is where we store long file names.
407          */
408         uint noffset = 0;
409         for (size_t i = 0; i < objmodules.dim; i++)
410         {
411             MSCoffObjModule* om = objmodules[i];
412             size_t len = om.name.length;
413             if (len >= MSCOFF_OBJECT_NAME_SIZE)
414             {
415                 om.name_offset = noffset;
416                 noffset += len + 1;
417             }
418             else
419                 om.name_offset = -1;
420         }
421         static if (LOG)
422         {
423             printf("\tnoffset = x%x\n", noffset);
424         }
425         /************* Determine string table length ******************/
426         size_t slength = 0;
427         for (size_t i = 0; i < objsymbols.dim; i++)
428         {
429             MSCoffObjSymbol* os = objsymbols[i];
430             slength += os.name.length + 1;
431         }
432         /************* Offset of first module ***********************/
433         size_t moffset = 8; // signature
434         size_t firstLinkerMemberOffset = moffset;
435         moffset += MSCoffLibHeader.sizeof + 4 + objsymbols.dim * 4 + slength; // 1st Linker Member
436         moffset += moffset & 1;
437         size_t secondLinkerMemberOffset = moffset;
438         moffset += MSCoffLibHeader.sizeof + 4 + objmodules.dim * 4 + 4 + objsymbols.dim * 2 + slength;
439         moffset += moffset & 1;
440         size_t LongnamesMemberOffset = moffset;
441         moffset += MSCoffLibHeader.sizeof + noffset; // Longnames Member size
442         static if (LOG)
443         {
444             printf("\tmoffset = x%x\n", moffset);
445         }
446         /************* Offset of each module *************************/
447         for (size_t i = 0; i < objmodules.dim; i++)
448         {
449             MSCoffObjModule* om = objmodules[i];
450             moffset += moffset & 1;
451             om.offset = cast(uint)moffset;
452             if (om.scan)
453                 moffset += MSCoffLibHeader.sizeof + om.length;
454             else
455                 moffset += om.length;
456         }
457         libbuf.reserve(moffset);
458         /************* Write the library ******************/
459         libbuf.write("!<arch>\n");
460         MSCoffObjModule om;
461         om.name_offset = -1;
462         om.base = null;
463         om.length = cast(uint)(4 + objsymbols.dim * 4 + slength);
464         om.offset = 8;
465         om.name = "";
466         time_t file_time = 0;
467         .time(&file_time);
468         om.file_time = cast(long)file_time;
469         om.user_id = 0;
470         om.group_id = 0;
471         om.file_mode = 0;
472         /*** Write out First Linker Member ***/
473         assert(libbuf.length == firstLinkerMemberOffset);
474         MSCoffLibHeader h;
475         MSCoffOmToHeader(&h, &om);
476         libbuf.write((&h)[0 .. 1]);
477         char[4] buf;
478         Port.writelongBE(cast(uint)objsymbols.dim, buf.ptr);
479         libbuf.write(buf[0 .. 4]);
480         // Sort objsymbols[] in module offset order
481         objsymbols.sort!(MSCoffObjSymbol.offset_pred);
482         uint lastoffset;
483         for (size_t i = 0; i < objsymbols.dim; i++)
484         {
485             MSCoffObjSymbol* os = objsymbols[i];
486             //printf("objsymbols[%d] = '%s', offset = %u\n", i, os.name, os.om.offset);
487             if (i)
488             {
489                 // Should be sorted in module order
490                 assert(lastoffset <= os.om.offset);
491             }
492             lastoffset = os.om.offset;
493             Port.writelongBE(lastoffset, buf.ptr);
494             libbuf.write(buf[0 .. 4]);
495         }
496         for (size_t i = 0; i < objsymbols.dim; i++)
497         {
498             MSCoffObjSymbol* os = objsymbols[i];
499             libbuf.writestring(os.name);
500             libbuf.writeByte(0);
501         }
502         /*** Write out Second Linker Member ***/
503         if (libbuf.length & 1)
504             libbuf.writeByte('\n');
505         assert(libbuf.length == secondLinkerMemberOffset);
506         om.length = cast(uint)(4 + objmodules.dim * 4 + 4 + objsymbols.dim * 2 + slength);
507         MSCoffOmToHeader(&h, &om);
508         libbuf.write((&h)[0 .. 1]);
509         Port.writelongLE(cast(uint)objmodules.dim, buf.ptr);
510         libbuf.write(buf[0 .. 4]);
511         for (size_t i = 0; i < objmodules.dim; i++)
512         {
513             MSCoffObjModule* om2 = objmodules[i];
514             om2.index = cast(ushort)i;
515             Port.writelongLE(om2.offset, buf.ptr);
516             libbuf.write(buf[0 .. 4]);
517         }
518         Port.writelongLE(cast(uint)objsymbols.dim, buf.ptr);
519         libbuf.write(buf[0 .. 4]);
520         // Sort objsymbols[] in lexical order
521         objsymbols.sort!(MSCoffObjSymbol.name_pred);
522         for (size_t i = 0; i < objsymbols.dim; i++)
523         {
524             MSCoffObjSymbol* os = objsymbols[i];
525             Port.writelongLE(os.om.index + 1, buf.ptr);
526             libbuf.write(buf[0 .. 2]);
527         }
528         for (size_t i = 0; i < objsymbols.dim; i++)
529         {
530             MSCoffObjSymbol* os = objsymbols[i];
531             libbuf.writestring(os.name);
532             libbuf.writeByte(0);
533         }
534         /*** Write out longnames Member ***/
535         if (libbuf.length & 1)
536             libbuf.writeByte('\n');
537         //printf("libbuf %x longnames %x\n", (int)libbuf.length, (int)LongnamesMemberOffset);
538         assert(libbuf.length == LongnamesMemberOffset);
539         // header
540         memset(&h, ' ', MSCoffLibHeader.sizeof);
541         h.object_name[0] = '/';
542         h.object_name[1] = '/';
543         size_t len = sprintf(h.file_size.ptr, "%u", noffset);
544         assert(len < 10);
545         h.file_size[len] = ' ';
546         h.trailer[0] = '`';
547         h.trailer[1] = '\n';
548         libbuf.write((&h)[0 .. 1]);
549         for (size_t i = 0; i < objmodules.dim; i++)
550         {
551             MSCoffObjModule* om2 = objmodules[i];
552             if (om2.name_offset >= 0)
553             {
554                 libbuf.writestring(om2.name);
555                 libbuf.writeByte(0);
556             }
557         }
558         /* Write out each of the object modules
559          */
560         for (size_t i = 0; i < objmodules.dim; i++)
561         {
562             MSCoffObjModule* om2 = objmodules[i];
563             if (libbuf.length & 1)
564                 libbuf.writeByte('\n'); // module alignment
565             //printf("libbuf %x om %x\n", (int)libbuf.length, (int)om2.offset);
566             assert(libbuf.length == om2.offset);
567             if (om2.scan)
568             {
569                 MSCoffOmToHeader(&h, om2);
570                 libbuf.write((&h)[0 .. 1]); // module header
571                 libbuf.write(om2.base[0 .. om2.length]); // module contents
572             }
573             else
574             {
575                 // Header is included in om.base[0..length]
576                 libbuf.write(om2.base[0 .. om2.length]); // module contents
577             }
578         }
579         static if (LOG)
580         {
581             printf("moffset = x%x, libbuf.length = x%x\n", cast(uint)moffset, cast(uint)libbuf.length);
582         }
583         assert(libbuf.length == moffset);
584     }
585 }
586 
587 /*****************************************************************************/
588 /*****************************************************************************/
589 struct MSCoffObjModule
590 {
591     ubyte* base; // where are we holding it in memory
592     uint length; // in bytes
593     uint offset; // offset from start of library
594     ushort index; // index in Second Linker Member
595     const(char)[] name; // module name (file name) terminated with 0
596     int name_offset; // if not -1, offset into string table of name
597     long file_time; // file time
598     uint user_id;
599     uint group_id;
600     uint file_mode;
601     int scan; // 1 means scan for symbols
602 }
603 
604 enum MSCOFF_OBJECT_NAME_SIZE = 16;
605 
606 struct MSCoffLibHeader
607 {
608     char[MSCOFF_OBJECT_NAME_SIZE] object_name;
609     char[12] file_time;
610     char[6] user_id;
611     char[6] group_id;
612     char[8] file_mode; // in octal
613     char[10] file_size;
614     char[2] trailer;
615 }
616 
617 extern (C++) void MSCoffOmToHeader(MSCoffLibHeader* h, MSCoffObjModule* om)
618 {
619     size_t len;
620     if (om.name_offset == -1)
621     {
622         len = om.name.length;
623         memcpy(h.object_name.ptr, om.name.ptr, len);
624         h.object_name[len] = '/';
625     }
626     else
627     {
628         len = sprintf(h.object_name.ptr, "/%d", om.name_offset);
629         h.object_name[len] = ' ';
630     }
631     assert(len < MSCOFF_OBJECT_NAME_SIZE);
632     memset(h.object_name.ptr + len + 1, ' ', MSCOFF_OBJECT_NAME_SIZE - (len + 1));
633     /* In the following sprintf's, don't worry if the trailing 0
634      * that sprintf writes goes off the end of the field. It will
635      * write into the next field, which we will promptly overwrite
636      * anyway. (So make sure to write the fields in ascending order.)
637      */
638     len = sprintf(h.file_time.ptr, "%llu", cast(long)om.file_time);
639     assert(len <= 12);
640     memset(h.file_time.ptr + len, ' ', 12 - len);
641     // Match what MS tools do (set to all blanks)
642     memset(h.user_id.ptr, ' ', (h.user_id).sizeof);
643     memset(h.group_id.ptr, ' ', (h.group_id).sizeof);
644     len = sprintf(h.file_mode.ptr, "%o", om.file_mode);
645     assert(len <= 8);
646     memset(h.file_mode.ptr + len, ' ', 8 - len);
647     len = sprintf(h.file_size.ptr, "%u", om.length);
648     assert(len <= 10);
649     memset(h.file_size.ptr + len, ' ', 10 - len);
650     h.trailer[0] = '`';
651     h.trailer[1] = '\n';
652 }