1 /**
2  * Extract symbols from a Mach-O object file.
3  *
4  * Copyright:   Copyright (C) 1999-2021 by The D Language Foundation, All Rights Reserved
5  * Authors:     $(LINK2 http://www.digitalmars.com, Walter Bright)
6  * License:     $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/scanmach.d, _scanmach.d)
8  * Documentation:  https://dlang.org/phobos/dmd_scanmach.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/scanmach.d
10  */
11 
12 module dmd.scanmach;
13 
14 import core.stdc.string;
15 import core.stdc.stdint;
16 import dmd.globals;
17 import dmd.errors;
18 
19 //import core.sys.darwin.mach.loader;
20 import dmd.backend.mach;
21 
22 private enum LOG = false;
23 
24 /*****************************************
25  * Reads an object module from base[] and passes the names
26  * of any exported symbols to (*pAddSymbol)().
27  * Params:
28  *      pAddSymbol =  function to pass the names to
29  *      base =        array of contents of object module
30  *      module_name = name of the object module (used for error messages)
31  *      loc =         location to use for error printing
32  */
33 void scanMachObjModule(void delegate(const(char)[] name, int pickAny) pAddSymbol,
34         const(ubyte)[] base, const(char)* module_name, Loc loc)
35 {
36     static if (LOG)
37     {
38         printf("scanMachObjModule(%s)\n", module_name);
39     }
40 
41     void corrupt(int reason)
42     {
43         error(loc, "corrupt Mach-O object module `%s` %d", module_name, reason);
44     }
45 
46     const buf = base.ptr;
47     const buflen = base.length;
48     uint32_t ncmds;
49     mach_header* header = cast(mach_header*)buf;
50     mach_header_64* header64 = null;
51     /* First do sanity checks on object file
52      */
53     if (buflen < mach_header.sizeof)
54         return corrupt(__LINE__);
55 
56     if (header.magic == MH_MAGIC)
57     {
58         if (header.cputype != CPU_TYPE_I386)
59         {
60             error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header.cputype, CPU_TYPE_I386);
61             return;
62         }
63         if (header.filetype != MH_OBJECT)
64         {
65             error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header.filetype, MH_OBJECT);
66             return;
67         }
68         if (buflen < mach_header.sizeof + header.sizeofcmds)
69             return corrupt(__LINE__);
70         ncmds = header.ncmds;
71     }
72     else if (header.magic == MH_MAGIC_64)
73     {
74         header64 = cast(mach_header_64*)buf;
75         if (buflen < mach_header_64.sizeof)
76             return corrupt(__LINE__);
77         if (header64.cputype != CPU_TYPE_X86_64)
78         {
79             error(loc, "Mach-O object module `%s` has cputype = %d, should be %d", module_name, header64.cputype, CPU_TYPE_X86_64);
80             return;
81         }
82         if (header64.filetype != MH_OBJECT)
83         {
84             error(loc, "Mach-O object module `%s` has file type = %d, should be %d", module_name, header64.filetype, MH_OBJECT);
85             return;
86         }
87         if (buflen < mach_header_64.sizeof + header64.sizeofcmds)
88             return corrupt(__LINE__);
89         ncmds = header64.ncmds;
90     }
91     else
92         return corrupt(__LINE__);
93 
94     symtab_command* symtab_commands;
95     // Commands immediately follow mach_header
96     char* commands = cast(char*)buf + (header.magic == MH_MAGIC_64 ? mach_header_64.sizeof : mach_header.sizeof);
97     for (uint32_t i = 0; i < ncmds; i++)
98     {
99         load_command* command = cast(load_command*)commands;
100         //printf("cmd = 0x%02x, cmdsize = %u\n", command.cmd, command.cmdsize);
101         if (command.cmd == LC_SYMTAB)
102             symtab_commands = cast(symtab_command*)command;
103         commands += command.cmdsize;
104     }
105 
106     if (!symtab_commands)
107         return;
108 
109     // Get pointer to string table
110     char* strtab = cast(char*)buf + symtab_commands.stroff;
111     if (buflen < symtab_commands.stroff + symtab_commands.strsize)
112         return corrupt(__LINE__);
113 
114     if (header.magic == MH_MAGIC_64)
115     {
116         // Get pointer to symbol table
117         nlist_64* symtab = cast(nlist_64*)(cast(char*)buf + symtab_commands.symoff);
118         if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist_64.sizeof)
119             return corrupt(__LINE__);
120 
121         // For each symbol
122         for (int i = 0; i < symtab_commands.nsyms; i++)
123         {
124             nlist_64* s = symtab + i;
125             const(char)* name = strtab + s.n_strx;
126             const namelen = strlen(name);
127             if (s.n_type & N_STAB)
128             {
129                 // values in /usr/include/mach-o/stab.h
130                 //printf(" N_STAB");
131                 continue;
132             }
133 
134             version (none)
135             {
136                 if (s.n_type & N_PEXT)
137                 {
138                 }
139                 if (s.n_type & N_EXT)
140                 {
141                 }
142             }
143             switch (s.n_type & N_TYPE)
144             {
145             case N_UNDF:
146                 if (s.n_type & N_EXT && s.n_value != 0) // comdef
147                     pAddSymbol(name[0 .. namelen], 1);
148                 break;
149             case N_ABS:
150                 break;
151             case N_SECT:
152                 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/
153                     pAddSymbol(name[0 .. namelen], 1);
154                 break;
155             case N_PBUD:
156                 break;
157             case N_INDR:
158                 break;
159             default:
160                 break;
161             }
162 
163         }
164     }
165     else
166     {
167         // Get pointer to symbol table
168         nlist* symtab = cast(nlist*)(cast(char*)buf + symtab_commands.symoff);
169         if (buflen < symtab_commands.symoff + symtab_commands.nsyms * nlist.sizeof)
170             return corrupt(__LINE__);
171 
172         // For each symbol
173         for (int i = 0; i < symtab_commands.nsyms; i++)
174         {
175             nlist* s = symtab + i;
176             const(char)* name = strtab + s.n_strx;
177             const namelen = strlen(name);
178             if (s.n_type & N_STAB)
179             {
180                 // values in /usr/include/mach-o/stab.h
181                 //printf(" N_STAB");
182                 continue;
183             }
184 
185             version (none)
186             {
187                 if (s.n_type & N_PEXT)
188                 {
189                 }
190                 if (s.n_type & N_EXT)
191                 {
192                 }
193             }
194             switch (s.n_type & N_TYPE)
195             {
196             case N_UNDF:
197                 if (s.n_type & N_EXT && s.n_value != 0) // comdef
198                     pAddSymbol(name[0 .. namelen], 1);
199                 break;
200             case N_ABS:
201                 break;
202             case N_SECT:
203                 if (s.n_type & N_EXT) /*&& !(s.n_desc & N_REF_TO_WEAK)*/
204                     pAddSymbol(name[0 .. namelen], 1);
205                 break;
206             case N_PBUD:
207                 break;
208             case N_INDR:
209                 break;
210             default:
211                 break;
212             }
213         }
214     }
215 }
216 
217 private: // for the remainder of this module
218 
219 enum CPU_TYPE_I386 = 7;
220 enum CPU_TYPE_X86_64 = CPU_TYPE_I386 | 0x1000000;
221 
222 enum MH_OBJECT = 0x1;
223 
224 struct segment_command
225 {
226     uint32_t cmd;
227     uint32_t cmdsize;
228     char[16] segname;
229     uint32_t vmaddr;
230     uint32_t vmsize;
231     uint32_t fileoff;
232     uint32_t filesize;
233     int32_t  maxprot;
234     int32_t  initprot;
235     uint32_t nsects;
236     uint32_t flags;
237 }
238 
239 struct segment_command_64
240 {
241     uint32_t cmd;
242     uint32_t cmdsize;
243     char[16] segname;
244     uint64_t vmaddr;
245     uint64_t vmsize;
246     uint64_t fileoff;
247     uint64_t filesize;
248     int32_t  maxprot;
249     int32_t  initprot;
250     uint32_t nsects;
251     uint32_t flags;
252 }
253 
254 struct symtab_command
255 {
256     uint32_t cmd;
257     uint32_t cmdsize;
258     uint32_t symoff;
259     uint32_t nsyms;
260     uint32_t stroff;
261     uint32_t strsize;
262 }
263 
264 struct dysymtab_command
265 {
266     uint32_t cmd;
267     uint32_t cmdsize;
268     uint32_t ilocalsym;
269     uint32_t nlocalsym;
270     uint32_t iextdefsym;
271     uint32_t nextdefsym;
272     uint32_t iundefsym;
273     uint32_t nundefsym;
274     uint32_t tocoff;
275     uint32_t ntoc;
276     uint32_t modtaboff;
277     uint32_t nmodtab;
278     uint32_t extrefsymoff;
279     uint32_t nextrefsyms;
280     uint32_t indirectsymoff;
281     uint32_t nindirectsyms;
282     uint32_t extreloff;
283     uint32_t nextrel;
284     uint32_t locreloff;
285     uint32_t nlocrel;
286 }
287 
288 enum LC_SEGMENT    = 1;
289 enum LC_SYMTAB     = 2;
290 enum LC_DYSYMTAB   = 11;
291 enum LC_SEGMENT_64 = 0x19;
292 
293 struct load_command
294 {
295     uint32_t cmd;
296     uint32_t cmdsize;
297 }
298 
299 enum N_EXT  = 1;
300 enum N_STAB = 0xE0;
301 enum N_PEXT = 0x10;
302 enum N_TYPE = 0x0E;
303 enum N_UNDF = 0;
304 enum N_ABS  = 2;
305 enum N_INDR = 10;
306 enum N_PBUD = 12;
307 enum N_SECT = 14;
308 
309 struct nlist
310 {
311     int32_t n_strx;
312     uint8_t n_type;
313     uint8_t n_sect;
314     int16_t n_desc;
315     uint32_t n_value;
316 }
317 
318 struct nlist_64
319 {
320     uint32_t n_strx;
321     uint8_t n_type;
322     uint8_t n_sect;
323     uint16_t n_desc;
324     uint64_t n_value;
325 }