1 /**
2  * A library in the Mach-O format, used on macOS.
3  *
4  * Copyright:   Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/libmach.d, _libmach.d)
8  * Documentation:  https://dlang.org/phobos/dmd_libmach.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/libmach.d
10  */
11 
12 module dmd.libmach;
13 
14 version(OSX):
15 
16 import core.stdc.time;
17 import core.stdc.string;
18 import core.stdc.stdlib;
19 import core.stdc.stdio;
20 import core.stdc.config;
21 
22 import core.sys.posix.sys.stat;
23 import core.sys.posix.unistd;
24 
25 import dmd.globals;
26 import dmd.lib;
27 import dmd.utils;
28 
29 import dmd.root.array;
30 import dmd.root.file;
31 import dmd.root.filename;
32 import dmd.root.outbuffer;
33 import dmd.root.port;
34 import dmd.root.rmem;
35 import dmd.root.string;
36 import dmd.root.stringtable;
37 
38 import dmd.scanmach;
39 
40 // Entry point (only public symbol in this module).
41 public extern (C++) Library LibMach_factory()
42 {
43     return new LibMach();
44 }
45 
46 private: // for the remainder of this module
47 
48 enum LOG = false;
49 
50 struct MachObjSymbol
51 {
52     const(char)[] name;         // still has a terminating 0
53     MachObjModule* om;
54 }
55 
56 alias MachObjModules = Array!(MachObjModule*);
57 alias MachObjSymbols = Array!(MachObjSymbol*);
58 
59 final class LibMach : Library
60 {
61     MachObjModules objmodules; // MachObjModule[]
62     MachObjSymbols objsymbols; // MachObjSymbol[]
63     StringTable!(MachObjSymbol*) tab;
64 
65     extern (D) this()
66     {
67         tab._init(14_000);
68     }
69 
70     /***************************************
71      * Add object module or library to the library.
72      * Examine the buffer to see which it is.
73      * If the buffer is NULL, use module_name as the file name
74      * and load the file.
75      */
76     override void addObject(const(char)[] module_name, const ubyte[] buffer)
77     {
78         static if (LOG)
79         {
80             printf("LibMach::addObject(%.*s)\n",
81                    cast(int)module_name.length, module_name.ptr);
82         }
83 
84         void corrupt(int reason)
85         {
86             error("corrupt Mach object module %.*s %d",
87                   cast(int)module_name.length, module_name.ptr, reason);
88         }
89 
90         int fromfile = 0;
91         auto buf = buffer.ptr;
92         auto buflen = buffer.length;
93         if (!buf)
94         {
95             assert(module_name[0]);
96             // read file and take buffer ownership
97             auto data = readFile(Loc.initial, module_name).extractSlice();
98             buf = data.ptr;
99             buflen = data.length;
100             fromfile = 1;
101         }
102         if (buflen < 16)
103         {
104             static if (LOG)
105             {
106                 printf("buf = %p, buflen = %d\n", buf, buflen);
107             }
108             return corrupt(__LINE__);
109         }
110         if (memcmp(buf, "!<arch>\n".ptr, 8) == 0)
111         {
112             /* Library file.
113              * Pull each object module out of the library and add it
114              * to the object module array.
115              */
116             static if (LOG)
117             {
118                 printf("archive, buf = %p, buflen = %d\n", buf, buflen);
119             }
120             uint offset = 8;
121             char* symtab = null;
122             uint symtab_size = 0;
123             uint mstart = cast(uint)objmodules.dim;
124             while (offset < buflen)
125             {
126                 if (offset + MachLibHeader.sizeof >= buflen)
127                     return corrupt(__LINE__);
128                 MachLibHeader* header = cast(MachLibHeader*)(cast(ubyte*)buf + offset);
129                 offset += MachLibHeader.sizeof;
130                 char* endptr = null;
131                 uint size = cast(uint)strtoul(header.file_size.ptr, &endptr, 10);
132                 if (endptr >= header.file_size.ptr + 10 || *endptr != ' ')
133                     return corrupt(__LINE__);
134                 if (offset + size > buflen)
135                     return corrupt(__LINE__);
136                 if (memcmp(header.object_name.ptr, "__.SYMDEF       ".ptr, 16) == 0 ||
137                     memcmp(header.object_name.ptr, "__.SYMDEF SORTED".ptr, 16) == 0)
138                 {
139                     /* Instead of rescanning the object modules we pull from a
140                      * library, just use the already created symbol table.
141                      */
142                     if (symtab)
143                         return corrupt(__LINE__);
144                     symtab = cast(char*)buf + offset;
145                     symtab_size = size;
146                     if (size < 4)
147                         return corrupt(__LINE__);
148                 }
149                 else
150                 {
151                     auto om = new MachObjModule();
152                     om.base = cast(ubyte*)buf + offset - MachLibHeader.sizeof;
153                     om.length = cast(uint)(size + MachLibHeader.sizeof);
154                     om.offset = 0;
155                     const n = cast(const(char)*)(om.base + MachLibHeader.sizeof);
156                     om.name = n.toDString();
157                     om.file_time = cast(uint)strtoul(header.file_time.ptr, &endptr, 10);
158                     om.user_id = cast(uint)strtoul(header.user_id.ptr, &endptr, 10);
159                     om.group_id = cast(uint)strtoul(header.group_id.ptr, &endptr, 10);
160                     om.file_mode = cast(uint)strtoul(header.file_mode.ptr, &endptr, 8);
161                     om.scan = 0; // don't scan object module for symbols
162                     objmodules.push(om);
163                 }
164                 offset += (size + 1) & ~1;
165             }
166             if (offset != buflen)
167                 return corrupt(__LINE__);
168             /* Scan the library's symbol table, and insert it into our own.
169              * We use this instead of rescanning the object module, because
170              * the library's creator may have a different idea of what symbols
171              * go into the symbol table than we do.
172              * This is also probably faster.
173              */
174             uint nsymbols = Port.readlongLE(symtab) / 8;
175             char* s = symtab + 4 + nsymbols * 8 + 4;
176             if (4 + nsymbols * 8 + 4 > symtab_size)
177                 return corrupt(__LINE__);
178             for (uint i = 0; i < nsymbols; i++)
179             {
180                 uint soff = Port.readlongLE(symtab + 4 + i * 8);
181                 const(char)* name = s + soff;
182                 size_t namelen = strlen(name);
183                 //printf("soff = x%x name = %s\n", soff, name);
184                 if (s + namelen + 1 - symtab > symtab_size)
185                     return corrupt(__LINE__);
186                 uint moff = Port.readlongLE(symtab + 4 + i * 8 + 4);
187                 //printf("symtab[%d] moff = x%x  x%x, name = %s\n", i, moff, moff + sizeof(Header), name);
188                 for (uint m = mstart; 1; m++)
189                 {
190                     if (m == objmodules.dim)
191                         return corrupt(__LINE__);       // didn't find it
192                     MachObjModule* om = objmodules[m];
193                     //printf("\tom offset = x%x\n", (char *)om.base - (char *)buf);
194                     if (moff == cast(char*)om.base - cast(char*)buf)
195                     {
196                         addSymbol(om, name[0 .. namelen], 1);
197                         //if (mstart == m)
198                         //    mstart++;
199                         break;
200                     }
201                 }
202             }
203             return;
204         }
205         /* It's an object module
206          */
207         auto om = new MachObjModule();
208         om.base = cast(ubyte*)buf;
209         om.length = cast(uint)buflen;
210         om.offset = 0;
211         const n = FileName.name(module_name); // remove path, but not extension
212         om.name = n;
213         om.scan = 1;
214         if (fromfile)
215         {
216             stat_t statbuf;
217             int i = module_name.toCStringThen!(slice => stat(slice.ptr, &statbuf));
218             if (i == -1) // error, errno is set
219                 return corrupt(__LINE__);
220             om.file_time = statbuf.st_ctime;
221             om.user_id = statbuf.st_uid;
222             om.group_id = statbuf.st_gid;
223             om.file_mode = statbuf.st_mode;
224         }
225         else
226         {
227             /* Mock things up for the object module file that never was
228              * actually written out.
229              */
230             __gshared uid_t uid;
231             __gshared gid_t gid;
232             __gshared int _init;
233             if (!_init)
234             {
235                 _init = 1;
236                 uid = getuid();
237                 gid = getgid();
238             }
239             time(&om.file_time);
240             om.user_id = uid;
241             om.group_id = gid;
242             om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
243         }
244         objmodules.push(om);
245     }
246 
247     /*****************************************************************************/
248 
249     void addSymbol(MachObjModule* om, const(char)[] name, int pickAny = 0)
250     {
251         static if (LOG)
252         {
253             printf("LibMach::addSymbol(%s, %s, %d)\n", om.name.ptr, name.ptr, pickAny);
254         }
255         version (none)
256         {
257             // let linker sort out duplicates
258             StringValue* s = tab.insert(name.ptr, name.length, null);
259             if (!s)
260             {
261                 // already in table
262                 if (!pickAny)
263                 {
264                     s = tab.lookup(name.ptr, name.length);
265                     assert(s);
266                     MachObjSymbol* os = cast(MachObjSymbol*)s.ptrvalue;
267                     error("multiple definition of %s: %s and %s: %s", om.name.ptr, name.ptr, os.om.name.ptr, os.name.ptr);
268                 }
269             }
270             else
271             {
272                 auto os = new MachObjSymbol();
273                 os.name = xarraydup(name);
274                 os.om = om;
275                 s.ptrvalue = cast(void*)os;
276                 objsymbols.push(os);
277             }
278         }
279         else
280         {
281             auto os = new MachObjSymbol();
282             os.name = xarraydup(name);
283             os.om = om;
284             objsymbols.push(os);
285         }
286     }
287 
288 private:
289     /************************************
290      * Scan single object module for dictionary symbols.
291      * Send those symbols to LibMach::addSymbol().
292      */
293     void scanObjModule(MachObjModule* om)
294     {
295         static if (LOG)
296         {
297             printf("LibMach::scanObjModule(%s)\n", om.name.ptr);
298         }
299 
300         extern (D) void addSymbol(const(char)[] name, int pickAny)
301         {
302             this.addSymbol(om, name, pickAny);
303         }
304 
305         scanMachObjModule(&addSymbol, om.base[0 .. om.length], om.name.ptr, loc);
306     }
307 
308     /*****************************************************************************/
309     /*****************************************************************************/
310     /**********************************************
311      * Create and write library to libbuf.
312      * The library consists of:
313      *      !<arch>\n
314      *      header
315      *      dictionary
316      *      object modules...
317      */
318     protected override void WriteLibToBuffer(OutBuffer* libbuf)
319     {
320         static if (LOG)
321         {
322             printf("LibMach::WriteLibToBuffer()\n");
323         }
324         __gshared char* pad = [0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A, 0x0A];
325         /************* Scan Object Modules for Symbols ******************/
326         for (size_t i = 0; i < objmodules.dim; i++)
327         {
328             MachObjModule* om = objmodules[i];
329             if (om.scan)
330             {
331                 scanObjModule(om);
332             }
333         }
334         /************* Determine module offsets ******************/
335         uint moffset = 8 + MachLibHeader.sizeof + 4 + 4;
336         for (size_t i = 0; i < objsymbols.dim; i++)
337         {
338             MachObjSymbol* os = objsymbols[i];
339             moffset += 8 + os.name.length + 1;
340         }
341         moffset = (moffset + 3) & ~3;
342         //if (moffset & 4)
343         //    moffset += 4;
344         uint hoffset = moffset;
345         static if (LOG)
346         {
347             printf("\tmoffset = x%x\n", moffset);
348         }
349         for (size_t i = 0; i < objmodules.dim; i++)
350         {
351             MachObjModule* om = objmodules[i];
352             moffset += moffset & 1;
353             om.offset = moffset;
354             if (om.scan)
355             {
356                 const slen = om.name.length;
357                 int nzeros = 8 - ((slen + 4) & 7);
358                 if (nzeros < 4)
359                     nzeros += 8; // emulate mysterious behavior of ar
360                 int filesize = om.length;
361                 filesize = (filesize + 7) & ~7;
362                 moffset += MachLibHeader.sizeof + slen + nzeros + filesize;
363             }
364             else
365             {
366                 moffset += om.length;
367             }
368         }
369         libbuf.reserve(moffset);
370         /************* Write the library ******************/
371         libbuf.write("!<arch>\n");
372         MachObjModule om;
373         om.base = null;
374         om.length = cast(uint)(hoffset - (8 + MachLibHeader.sizeof));
375         om.offset = 8;
376         om.name = "";
377         .time(&om.file_time);
378         om.user_id = getuid();
379         om.group_id = getgid();
380         om.file_mode = (1 << 15) | (6 << 6) | (4 << 3) | (4 << 0); // 0100644
381         MachLibHeader h;
382         MachOmToHeader(&h, &om);
383         memcpy(h.object_name.ptr, "__.SYMDEF".ptr, 9);
384         int len = sprintf(h.file_size.ptr, "%u", om.length);
385         assert(len <= 10);
386         memset(h.file_size.ptr + len, ' ', 10 - len);
387         libbuf.write((&h)[0 .. 1]);
388         char[4] buf;
389         Port.writelongLE(cast(uint)(objsymbols.dim * 8), buf.ptr);
390         libbuf.write(buf[0 .. 4]);
391         int stringoff = 0;
392         for (size_t i = 0; i < objsymbols.dim; i++)
393         {
394             MachObjSymbol* os = objsymbols[i];
395             Port.writelongLE(stringoff, buf.ptr);
396             libbuf.write(buf[0 .. 4]);
397             Port.writelongLE(os.om.offset, buf.ptr);
398             libbuf.write(buf[0 .. 4]);
399             stringoff += os.name.length + 1;
400         }
401         Port.writelongLE(stringoff, buf.ptr);
402         libbuf.write(buf[0 .. 4]);
403         for (size_t i = 0; i < objsymbols.dim; i++)
404         {
405             MachObjSymbol* os = objsymbols[i];
406             libbuf.writestring(os.name);
407             libbuf.writeByte(0);
408         }
409         while (libbuf.length & 3)
410             libbuf.writeByte(0);
411         //if (libbuf.length & 4)
412         //    libbuf.write(pad[0 .. 4]);
413         static if (LOG)
414         {
415             printf("\tlibbuf.moffset = x%x\n", libbuf.length);
416         }
417         assert(libbuf.length == hoffset);
418         /* Write out each of the object modules
419          */
420         for (size_t i = 0; i < objmodules.dim; i++)
421         {
422             MachObjModule* om2 = objmodules[i];
423             if (libbuf.length & 1)
424                 libbuf.writeByte('\n'); // module alignment
425             assert(libbuf.length == om2.offset);
426             if (om2.scan)
427             {
428                 MachOmToHeader(&h, om2);
429                 libbuf.write((&h)[0 .. 1]); // module header
430                 libbuf.write(om2.name.ptr[0 .. om2.name.length]);
431                 int nzeros = 8 - ((om2.name.length + 4) & 7);
432                 if (nzeros < 4)
433                     nzeros += 8; // emulate mysterious behavior of ar
434                 libbuf.fill0(nzeros);
435                 libbuf.write(om2.base[0 .. om2.length]); // module contents
436                 // obj modules are padded out to 8 bytes in length with 0x0A
437                 int filealign = om2.length & 7;
438                 if (filealign)
439                 {
440                     libbuf.write(pad[0 .. 8 - filealign]);
441                 }
442             }
443             else
444             {
445                 libbuf.write(om2.base[0 .. om2.length]); // module contents
446             }
447         }
448         static if (LOG)
449         {
450             printf("moffset = x%x, libbuf.length = x%x\n", moffset, libbuf.length);
451         }
452         assert(libbuf.length == moffset);
453     }
454 }
455 
456 /*****************************************************************************/
457 /*****************************************************************************/
458 struct MachObjModule
459 {
460     ubyte* base; // where are we holding it in memory
461     uint length; // in bytes
462     uint offset; // offset from start of library
463     const(char)[] name; // module name (file name) with terminating 0
464     c_long file_time; // file time
465     uint user_id;
466     uint group_id;
467     uint file_mode;
468     int scan; // 1 means scan for symbols
469 }
470 
471 enum MACH_OBJECT_NAME_SIZE = 16;
472 
473 struct MachLibHeader
474 {
475     char[MACH_OBJECT_NAME_SIZE] object_name;
476     char[12] file_time;
477     char[6] user_id;
478     char[6] group_id;
479     char[8] file_mode; // in octal
480     char[10] file_size;
481     char[2] trailer;
482 }
483 
484 extern (C++) void MachOmToHeader(MachLibHeader* h, MachObjModule* om)
485 {
486     const slen = om.name.length;
487     int nzeros = 8 - ((slen + 4) & 7);
488     if (nzeros < 4)
489         nzeros += 8; // emulate mysterious behavior of ar
490     size_t len = sprintf(h.object_name.ptr, "#1/%ld", slen + nzeros);
491     memset(h.object_name.ptr + len, ' ', MACH_OBJECT_NAME_SIZE - len);
492     /* In the following sprintf's, don't worry if the trailing 0
493      * that sprintf writes goes off the end of the field. It will
494      * write into the next field, which we will promptly overwrite
495      * anyway. (So make sure to write the fields in ascending order.)
496      */
497     len = sprintf(h.file_time.ptr, "%llu", cast(long)om.file_time);
498     assert(len <= 12);
499     memset(h.file_time.ptr + len, ' ', 12 - len);
500     if (om.user_id > 999_999) // yes, it happens
501         om.user_id = 0; // don't really know what to do here
502     len = sprintf(h.user_id.ptr, "%u", om.user_id);
503     assert(len <= 6);
504     memset(h.user_id.ptr + len, ' ', 6 - len);
505     if (om.group_id > 999_999) // yes, it happens
506         om.group_id = 0; // don't really know what to do here
507     len = sprintf(h.group_id.ptr, "%u", om.group_id);
508     assert(len <= 6);
509     memset(h.group_id.ptr + len, ' ', 6 - len);
510     len = sprintf(h.file_mode.ptr, "%o", om.file_mode);
511     assert(len <= 8);
512     memset(h.file_mode.ptr + len, ' ', 8 - len);
513     int filesize = om.length;
514     filesize = (filesize + 7) & ~7;
515     len = sprintf(h.file_size.ptr, "%lu", slen + nzeros + filesize);
516     assert(len <= 10);
517     memset(h.file_size.ptr + len, ' ', 10 - len);
518     h.trailer[0] = '`';
519     h.trailer[1] = '\n';
520 }