1 /**
2  * A library in the Mach-O format, used on macOS.
3  *
4  * Copyright:   Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libmach.d, _libmach.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libmach.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libmach.d
10  */
11 
12 module dmd.libmach;
13 
14 import core.stdc.time;
15 import core.stdc.string;
16 import core.stdc.stdlib;
17 import core.stdc.stdio;
18 import core.stdc.config;
19 
20 version (Posix)
21 {
22     import core.sys.posix.sys.stat;
23     import core.sys.posix.unistd;
24 }
25 version (Windows)
26 {
27     import core.sys.windows.stat;
28 }
29 
30 import dmd.globals;
31 import dmd.lib;
32 import dmd.utils;
33 
34 import dmd.root.array;
35 import dmd.root.file;
36 import dmd.root.filename;
37 import dmd.root.outbuffer;
38 import dmd.root.port;
39 import dmd.root.rmem;
40 import dmd.root.string;
41 import dmd.root.stringtable;
42 
43 import dmd.scanmach;
44 
45 // Entry point (only public symbol in this module).
46 public extern (C++) Library LibMach_factory()
47 {
48     return new LibMach();
49 }
50 
51 private: // for the remainder of this module
52 
53 enum LOG = false;
54 
55 struct MachObjSymbol
56 {
57     const(char)[] name;         // still has a terminating 0
58     MachObjModule* om;
59 }
60 
61 alias MachObjModules = Array!(MachObjModule*);
62 alias MachObjSymbols = Array!(MachObjSymbol*);
63 
64 final class LibMach : Library
65 {
66     MachObjModules objmodules; // MachObjModule[]
67     MachObjSymbols objsymbols; // MachObjSymbol[]
68     StringTable!(MachObjSymbol*) tab;
69 
70     extern (D) this()
71     {
72         tab._init(14_000);
73     }
74 
75     /***************************************
76      * Add object module or library to the library.
77      * Examine the buffer to see which it is.
78      * If the buffer is NULL, use module_name as the file name
79      * and load the file.
80      */
81     override void addObject(const(char)[] module_name, const ubyte[] buffer)
82     {
83         static if (LOG)
84         {
85             printf("LibMach::addObject(%.*s)\n",
86                    cast(int)module_name.length, module_name.ptr);
87         }
88 
89         void corrupt(int reason)
90         {
91             error("corrupt Mach object module %.*s %d",
92                   cast(int)module_name.length, module_name.ptr, reason);
93         }
94 
95         int fromfile = 0;
96         auto buf = buffer.ptr;
97         auto buflen = buffer.length;
98         if (!buf)
99         {
100             assert(module_name[0]);
101             // read file and take buffer ownership
102             auto data = readFile(Loc.initial, module_name).extractSlice();
103             buf = data.ptr;
104             buflen = data.length;
105             fromfile = 1;
106         }
107         if (buflen < 16)
108         {
109             static if (LOG)
110             {
111                 printf("buf = %p, buflen = %d\n", buf, buflen);
112             }
113             return corrupt(__LINE__);
114         }
115         if (memcmp(buf, "!<arch>\n".ptr, 8) == 0)
116         {
117             /* Library file.
118              * Pull each object module out of the library and add it
119              * to the object module array.
120              */
121             static if (LOG)
122             {
123                 printf("archive, buf = %p, buflen = %d\n", buf, buflen);
124             }
125             uint offset = 8;
126             char* symtab = null;
127             uint symtab_size = 0;
128             uint mstart = cast(uint)objmodules.dim;
129             while (offset < buflen)
130             {
131                 if (offset + MachLibHeader.sizeof >= buflen)
132                     return corrupt(__LINE__);
133                 MachLibHeader* header = cast(MachLibHeader*)(cast(ubyte*)buf + offset);
134                 offset += MachLibHeader.sizeof;
135                 char* endptr = null;
136                 uint size = cast(uint)strtoul(header.file_size.ptr, &endptr, 10);
137                 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ')
138                     return corrupt(__LINE__);
139                 if (offset + size > buflen)
140                     return corrupt(__LINE__);
141                 if (memcmp(header.object_name.ptr, "__.SYMDEF       ".ptr, 16) == 0 ||
142                     memcmp(header.object_name.ptr, "__.SYMDEF SORTED".ptr, 16) == 0)
143                 {
144                     /* Instead of rescanning the object modules we pull from a
145                      * library, just use the already created symbol table.
146                      */
147                     if (symtab)
148                         return corrupt(__LINE__);
149                     symtab = cast(char*)buf + offset;
150                     symtab_size = size;
151                     if (size < 4)
152                         return corrupt(__LINE__);
153                 }
154                 else
155                 {
156                     auto om = new MachObjModule();
157                     om.base = cast(ubyte*)buf + offset - MachLibHeader.sizeof;
158                     om.length = cast(uint)(size + MachLibHeader.sizeof);
159                     om.offset = 0;
160                     const n = cast(const(char)*)(om.base + MachLibHeader.sizeof);
161                     om.name = n.toDString();
162                     om.file_time = cast(uint)strtoul(header.file_time.ptr, &endptr, 10);
163                     om.user_id = cast(uint)strtoul(header.user_id.ptr, &endptr, 10);
164                     om.group_id = cast(uint)strtoul(header.group_id.ptr, &endptr, 10);
165                     om.file_mode = cast(uint)strtoul(header.file_mode.ptr, &endptr, 8);
166                     om.scan = 0; // don't scan object module for symbols
167                     objmodules.push(om);
168                 }
169                 offset += (size + 1) & ~1;
170             }
171             if (offset != buflen)
172                 return corrupt(__LINE__);
173             /* Scan the library's symbol table, and insert it into our own.
174              * We use this instead of rescanning the object module, because
175              * the library's creator may have a different idea of what symbols
176              * go into the symbol table than we do.
177              * This is also probably faster.
178              */
179             uint nsymbols = Port.readlongLE(symtab) / 8;
180             char* s = symtab + 4 + nsymbols * 8 + 4;
181             if (4 + nsymbols * 8 + 4 > symtab_size)
182                 return corrupt(__LINE__);
183             for (uint i = 0; i < nsymbols; i++)
184             {
185                 uint soff = Port.readlongLE(symtab + 4 + i * 8);
186                 const(char)* name = s + soff;
187                 size_t namelen = strlen(name);
188                 //printf("soff = x%x name = %s\n", soff, name);
189                 if (s + namelen + 1 - symtab > symtab_size)
190                     return corrupt(__LINE__);
191                 uint moff = Port.readlongLE(symtab + 4 + i * 8 + 4);
192                 //printf("symtab[%d] moff = x%x  x%x, name = %s\n", i, moff, moff + sizeof(Header), name);
193                 for (uint m = mstart; 1; m++)
194                 {
195                     if (m == objmodules.dim)
196                         return corrupt(__LINE__);       // didn't find it
197                     MachObjModule* om = objmodules[m];
198                     //printf("\tom offset = x%x\n", (char *)om.base - (char *)buf);
199                     if (moff == cast(char*)om.base - cast(char*)buf)
200                     {
201                         addSymbol(om, name[0 .. namelen], 1);
202                         //if (mstart == m)
203                         //    mstart++;
204                         break;
205                     }
206                 }
207             }
208             return;
209         }
210         /* It's an object module
211          */
212         auto om = new MachObjModule();
213         om.base = cast(ubyte*)buf;
214         om.length = cast(uint)buflen;
215         om.offset = 0;
216         const n = FileName.name(module_name); // remove path, but not extension
217         om.name = n;
218         om.scan = 1;
219         if (fromfile)
220         {
221             version (Posix)
222                 stat_t statbuf;
223             version (Windows)
224                 struct_stat statbuf;
225             int i = module_name.toCStringThen!(slice => stat(slice.ptr, &statbuf));
226             if (i == -1) // error, errno is set
227                 return corrupt(__LINE__);
228             om.file_time = statbuf.st_ctime;
229             om.user_id = statbuf.st_uid;
230             om.group_id = statbuf.st_gid;
231             om.file_mode = statbuf.st_mode;
232         }
233         else
234         {
235             /* Mock things up for the object module file that never was
236              * actually written out.
237              */
238             version (Posix)
239             {
240                 __gshared uid_t uid;
241                 __gshared gid_t gid;
242                 __gshared int _init;
243                 if (!_init)
244                 {
245                     _init = 1;
246                     uid = getuid();
247                     gid = getgid();
248                 }
249                 om.user_id = uid;
250                 om.group_id = gid;
251             }
252             version (Windows)
253             {
254                 om.user_id = 0; // meaningless on Windows
255                 om.group_id = 0;        // meaningless on Windows
256             }
257             time(&om.file_time);
258             om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
259         }
260         objmodules.push(om);
261     }
262 
263     /*****************************************************************************/
264 
265     void addSymbol(MachObjModule* om, const(char)[] name, int pickAny = 0)
266     {
267         static if (LOG)
268         {
269             printf("LibMach::addSymbol(%s, %s, %d)\n", om.name.ptr, name.ptr, pickAny);
270         }
271         version (none)
272         {
273             // let linker sort out duplicates
274             StringValue* s = tab.insert(name.ptr, name.length, null);
275             if (!s)
276             {
277                 // already in table
278                 if (!pickAny)
279                 {
280                     s = tab.lookup(name.ptr, name.length);
281                     assert(s);
282                     MachObjSymbol* os = cast(MachObjSymbol*)s.ptrvalue;
283                     error("multiple definition of %s: %s and %s: %s", om.name.ptr, name.ptr, os.om.name.ptr, os.name.ptr);
284                 }
285             }
286             else
287             {
288                 auto os = new MachObjSymbol();
289                 os.name = xarraydup(name);
290                 os.om = om;
291                 s.ptrvalue = cast(void*)os;
292                 objsymbols.push(os);
293             }
294         }
295         else
296         {
297             auto os = new MachObjSymbol();
298             os.name = xarraydup(name);
299             os.om = om;
300             objsymbols.push(os);
301         }
302     }
303 
304 private:
305     /************************************
306      * Scan single object module for dictionary symbols.
307      * Send those symbols to LibMach::addSymbol().
308      */
309     void scanObjModule(MachObjModule* om)
310     {
311         static if (LOG)
312         {
313             printf("LibMach::scanObjModule(%s)\n", om.name.ptr);
314         }
315 
316         extern (D) void addSymbol(const(char)[] name, int pickAny)
317         {
318             this.addSymbol(om, name, pickAny);
319         }
320 
321         scanMachObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc);
322     }
323 
324     /*****************************************************************************/
325     /*****************************************************************************/
326     /**********************************************
327      * Create and write library to libbuf.
328      * The library consists of:
329      *      !<arch>\n
330      *      header
331      *      dictionary
332      *      object modules...
333      */
334     protected override void WriteLibToBuffer(OutBuffer* libbuf)
335     {
336         static if (LOG)
337         {
338             printf("LibMach::WriteLibToBuffer()\n");
339         }
340         __gshared char* pad = [0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A];
341         /************* Scan Object Modules for Symbols ******************/
342         for (size_t i = 0; i < objmodules.dim; i++)
343         {
344             MachObjModule* om = objmodules[i];
345             if (om.scan)
346             {
347                 scanObjModule(om);
348             }
349         }
350         /************* Determine module offsets ******************/
351         uint moffset = 8 + MachLibHeader.sizeof + 4 + 4;
352         for (size_t i = 0; i < objsymbols.dim; i++)
353         {
354             MachObjSymbol* os = objsymbols[i];
355             moffset += 8 + os.name.length + 1;
356         }
357         moffset = (moffset + 3) & ~3;
358         //if (moffset & 4)
359         //    moffset += 4;
360         uint hoffset = moffset;
361         static if (LOG)
362         {
363             printf("\tmoffset = x%x\n", moffset);
364         }
365         for (size_t i = 0; i < objmodules.dim; i++)
366         {
367             MachObjModule* om = objmodules[i];
368             moffset += moffset & 1;
369             om.offset = moffset;
370             if (om.scan)
371             {
372                 const slen = om.name.length;
373                 int nzeros = 8 - ((slen + 4) & 7);
374                 if (nzeros < 4)
375                     nzeros += 8; // emulate mysterious behavior of ar
376                 int filesize = om.length;
377                 filesize = (filesize + 7) & ~7;
378                 moffset += MachLibHeader.sizeof + slen + nzeros + filesize;
379             }
380             else
381             {
382                 moffset += om.length;
383             }
384         }
385         libbuf.reserve(moffset);
386         /************* Write the library ******************/
387         libbuf.write("!<arch>\n");
388         MachObjModule om;
389         om.base = null;
390         om.length = cast(uint)(hoffset - (8 + MachLibHeader.sizeof));
391         om.offset = 8;
392         om.name = "";
393         .time(&om.file_time);
394         version (Posix)
395         {
396             om.user_id = getuid();
397             om.group_id = getgid();
398         }
399         version (Windows)
400         {
401             om.user_id = 0;
402             om.group_id = 0;
403         }
404         om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
405         MachLibHeader h;
406         MachOmToHeader(&h, &om);
407         memcpy(h.object_name.ptr, "__.SYMDEF".ptr, 9);
408         int len = sprintf(h.file_size.ptr, "%u", om.length);
409         assert(len <= 10);
410         memset(h.file_size.ptr + len, ' ', 10 - len);
411         libbuf.write((&h)[0 .. 1]);
412         char[4] buf;
413         Port.writelongLE(cast(uint)(objsymbols.dim * 8), buf.ptr);
414         libbuf.write(buf[0 .. 4]);
415         int stringoff = 0;
416         for (size_t i = 0; i < objsymbols.dim; i++)
417         {
418             MachObjSymbol* os = objsymbols[i];
419             Port.writelongLE(stringoff, buf.ptr);
420             libbuf.write(buf[0 .. 4]);
421             Port.writelongLE(os.om.offset, buf.ptr);
422             libbuf.write(buf[0 .. 4]);
423             stringoff += os.name.length + 1;
424         }
425         Port.writelongLE(stringoff, buf.ptr);
426         libbuf.write(buf[0 .. 4]);
427         for (size_t i = 0; i < objsymbols.dim; i++)
428         {
429             MachObjSymbol* os = objsymbols[i];
430             libbuf.writestring(os.name);
431             libbuf.writeByte(0);
432         }
433         while (libbuf.length & 3)
434             libbuf.writeByte(0);
435         //if (libbuf.length & 4)
436         //    libbuf.write(pad[0 .. 4]);
437         static if (LOG)
438         {
439             printf("\tlibbuf.moffset = x%x\n", libbuf.length);
440         }
441         assert(libbuf.length == hoffset);
442         /* Write out each of the object modules
443          */
444         for (size_t i = 0; i < objmodules.dim; i++)
445         {
446             MachObjModule* om2 = objmodules[i];
447             if (libbuf.length & 1)
448                 libbuf.writeByte('\n'); // module alignment
449             assert(libbuf.length == om2.offset);
450             if (om2.scan)
451             {
452                 MachOmToHeader(&h, om2);
453                 libbuf.write((&h)[0 .. 1]); // module header
454                 libbuf.write(om2.name.ptr[0 .. om2.name.length]);
455                 int nzeros = 8 - ((om2.name.length + 4) & 7);
456                 if (nzeros < 4)
457                     nzeros += 8; // emulate mysterious behavior of ar
458                 libbuf.fill0(nzeros);
459                 libbuf.write(om2.base[0 .. om2.length]); // module contents
460                 // obj modules are padded out to 8 bytes in length with 0x0A
461                 int filealign = om2.length & 7;
462                 if (filealign)
463                 {
464                     libbuf.write(pad[0 .. 8 - filealign]);
465                 }
466             }
467             else
468             {
469                 libbuf.write(om2.base[0 .. om2.length]); // module contents
470             }
471         }
472         static if (LOG)
473         {
474             printf("moffset = x%x, libbuf.length = x%x\n", moffset, libbuf.length);
475         }
476         assert(libbuf.length == moffset);
477     }
478 }
479 
480 /*****************************************************************************/
481 /*****************************************************************************/
482 struct MachObjModule
483 {
484     ubyte* base; // where are we holding it in memory
485     uint length; // in bytes
486     uint offset; // offset from start of library
487     const(char)[] name; // module name (file name) with terminating 0
488     c_long file_time; // file time
489     uint user_id;
490     uint group_id;
491     uint file_mode;
492     int scan; // 1 means scan for symbols
493 }
494 
495 enum MACH_OBJECT_NAME_SIZE = 16;
496 
497 struct MachLibHeader
498 {
499     char[MACH_OBJECT_NAME_SIZE] object_name;
500     char[12] file_time;
501     char[6] user_id;
502     char[6] group_id;
503     char[8] file_mode; // in octal
504     char[10] file_size;
505     char[2] trailer;
506 }
507 
508 extern (C++) void MachOmToHeader(MachLibHeader* h, MachObjModule* om)
509 {
510     const slen = om.name.length;
511     int nzeros = 8 - ((slen + 4) & 7);
512     if (nzeros < 4)
513         nzeros += 8; // emulate mysterious behavior of ar
514     size_t len = sprintf(h.object_name.ptr, "#1/%lld", cast(long)(slen + nzeros));
515     memset(h.object_name.ptr + len, ' ', MACH_OBJECT_NAME_SIZE - len);
516     /* In the following sprintf's, don't worry if the trailing 0
517      * that sprintf writes goes off the end of the field. It will
518      * write into the next field, which we will promptly overwrite
519      * anyway. (So make sure to write the fields in ascending order.)
520      */
521     len = sprintf(h.file_time.ptr, "%llu", cast(long)om.file_time);
522     assert(len <= 12);
523     memset(h.file_time.ptr + len, ' ', 12 - len);
524     if (om.user_id > 999_999) // yes, it happens
525         om.user_id = 0; // don't really know what to do here
526     len = sprintf(h.user_id.ptr, "%u", om.user_id);
527     assert(len <= 6);
528     memset(h.user_id.ptr + len, ' ', 6 - len);
529     if (om.group_id > 999_999) // yes, it happens
530         om.group_id = 0; // don't really know what to do here
531     len = sprintf(h.group_id.ptr, "%u", om.group_id);
532     assert(len <= 6);
533     memset(h.group_id.ptr + len, ' ', 6 - len);
534     len = sprintf(h.file_mode.ptr, "%o", om.file_mode);
535     assert(len <= 8);
536     memset(h.file_mode.ptr + len, ' ', 8 - len);
537     int filesize = om.length;
538     filesize = (filesize + 7) & ~7;
539     len = sprintf(h.file_size.ptr, "%llu", cast(ulong)(slen + nzeros + filesize));
540     assert(len <= 10);
541     memset(h.file_size.ptr + len, ' ', 10 - len);
542     h.trailer[0] = '`';
543     h.trailer[1] = '\n';
544 }