1 /**
2  * Try to detect typos in identifiers.
3  *
4  * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5  * Authors:   Walter Bright, http://www.digitalmars.com
6  * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7  * Source:    $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/speller.d, root/_speller.d)
8  * Documentation:  https://dlang.org/phobos/dmd_root_speller.html
9  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/speller.d
10  */
11 
12 module dmd.root.speller;
13 
14 import core.stdc.stdlib;
15 import core.stdc..string;
16 
17 immutable string idchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
18 
19 /**************************************************
20  * combine a new result from the spell checker to
21  * find the one with the closest symbol with
22  * respect to the cost defined by the search function
23  * Input/Output:
24  *      p       best found spelling (NULL if none found yet)
25  *      cost    cost of p (int.max if none found yet)
26  * Input:
27  *      np      new found spelling (NULL if none found)
28  *      ncost   cost of np if non-NULL
29  * Returns:
30  *      true    if the cost is less or equal 0
31  *      false   otherwise
32  */
33 private bool combineSpellerResult(T)(ref T p, ref int cost, T np, int ncost)
34 {
35     if (np && ncost < cost)
36     {
37         p = np;
38         cost = ncost;
39         if (cost <= 0)
40             return true;
41     }
42     return false;
43 }
44 
45 private auto spellerY(alias dg)(const(char)[] seed, size_t index, ref int cost)
46 {
47     if (!seed.length)
48         return null;
49     char[30] tmp;
50     char[] buf;
51     if (seed.length <= tmp.sizeof - 1)
52         buf = tmp;
53     else
54     {
55         buf = (cast(char*)alloca(seed.length + 1))[0 .. seed.length + 1]; // leave space for extra char
56         if (!buf.ptr)
57             return null; // no matches
58     }
59     buf[0 .. index] = seed[0 .. index];
60     cost = int.max;
61     searchFunctionType!dg p = null;
62     int ncost;
63     /* Delete at seed[index] */
64     if (index < seed.length)
65     {
66         buf[index .. seed.length - 1] = seed[index + 1 .. $];
67         auto np = dg(buf[0 .. seed.length - 1], ncost);
68         if (combineSpellerResult(p, cost, np, ncost))
69             return p;
70     }
71     /* Substitutions */
72     if (index < seed.length)
73     {
74         buf[0 .. seed.length] = seed;
75         foreach (s; idchars)
76         {
77             buf[index] = s;
78             //printf("sub buf = '%s'\n", buf);
79             auto np = dg(buf[0 .. seed.length], ncost);
80             if (combineSpellerResult(p, cost, np, ncost))
81                 return p;
82         }
83     }
84     /* Insertions */
85     buf[index + 1 .. seed.length + 1] = seed[index .. $];
86     foreach (s; idchars)
87     {
88         buf[index] = s;
89         //printf("ins buf = '%s'\n", buf);
90         auto np = dg(buf[0 .. seed.length + 1], ncost);
91         if (combineSpellerResult(p, cost, np, ncost))
92             return p;
93     }
94     return p; // return "best" result
95 }
96 
97 private auto spellerX(alias dg)(const(char)[] seed, bool flag)
98 {
99     if (!seed.length)
100         return null;
101     char[30] tmp;
102     char[] buf;
103     if (seed.length <= tmp.sizeof - 1)
104         buf = tmp;
105     else
106     {
107         buf = (cast(char*)alloca(seed.length + 1))[0 .. seed.length + 1]; // leave space for extra char
108     }
109     int cost = int.max, ncost;
110     searchFunctionType!dg p = null, np;
111     /* Deletions */
112     buf[0 .. seed.length - 1] = seed[1 .. $];
113     for (size_t i = 0; i < seed.length; i++)
114     {
115         //printf("del buf = '%s'\n", buf);
116         if (flag)
117             np = spellerY!dg(buf[0 .. seed.length - 1], i, ncost);
118         else
119             np = dg(buf[0 .. seed.length - 1], ncost);
120         if (combineSpellerResult(p, cost, np, ncost))
121             return p;
122         buf[i] = seed[i];
123     }
124     /* Transpositions */
125     if (!flag)
126     {
127         buf[0 .. seed.length] = seed;
128         for (size_t i = 0; i + 1 < seed.length; i++)
129         {
130             // swap [i] and [i + 1]
131             buf[i] = seed[i + 1];
132             buf[i + 1] = seed[i];
133             //printf("tra buf = '%s'\n", buf);
134             if (combineSpellerResult(p, cost, dg(buf[0 .. seed.length], ncost), ncost))
135                 return p;
136             buf[i] = seed[i];
137         }
138     }
139     /* Substitutions */
140     buf[0 .. seed.length] = seed;
141     for (size_t i = 0; i < seed.length; i++)
142     {
143         foreach (s; idchars)
144         {
145             buf[i] = s;
146             //printf("sub buf = '%s'\n", buf);
147             if (flag)
148                 np = spellerY!dg(buf[0 .. seed.length], i + 1, ncost);
149             else
150                 np = dg(buf[0 .. seed.length], ncost);
151             if (combineSpellerResult(p, cost, np, ncost))
152                 return p;
153         }
154         buf[i] = seed[i];
155     }
156     /* Insertions */
157     buf[1 .. seed.length + 1] = seed;
158     for (size_t i = 0; i <= seed.length; i++) // yes, do seed.length+1 iterations
159     {
160         foreach (s; idchars)
161         {
162             buf[i] = s;
163             //printf("ins buf = '%s'\n", buf);
164             if (flag)
165                 np = spellerY!dg(buf[0 .. seed.length + 1], i + 1, ncost);
166             else
167                 np = dg(buf[0 .. seed.length + 1], ncost);
168             if (combineSpellerResult(p, cost, np, ncost))
169                 return p;
170         }
171         if (i < seed.length)
172             buf[i] = seed[i];
173     }
174     return p; // return "best" result
175 }
176 
177 /**************************************************
178  * Looks for correct spelling.
179  * Currently only looks a 'distance' of one from the seed[].
180  * This does an exhaustive search, so can potentially be very slow.
181  * Params:
182  *      seed = wrongly spelled word
183  *      dg = search delegate
184  * Returns:
185  *      null = no correct spellings found, otherwise
186  *      the value returned by dg() for first possible correct spelling
187  */
188 auto speller(alias dg)(const(char)[] seed)
189 if (isSearchFunction!dg)
190 {
191     size_t maxdist = seed.length < 4 ? seed.length / 2 : 2;
192     for (int distance = 0; distance < maxdist; distance++)
193     {
194         auto p = spellerX!dg(seed, distance > 0);
195         if (p)
196             return p;
197         //      if (seedlen > 10)
198         //          break;
199     }
200     return null; // didn't find it
201 }
202 
203 enum isSearchFunction(alias fun) = is(searchFunctionType!fun);
204 alias searchFunctionType(alias fun) = typeof(() {int x; return fun("", x);}());
205 
206 unittest
207 {
208     static immutable string[][] cases =
209     [
210         ["hello", "hell", "y"],
211         ["hello", "hel", "y"],
212         ["hello", "ello", "y"],
213         ["hello", "llo", "y"],
214         ["hello", "hellox", "y"],
215         ["hello", "helloxy", "y"],
216         ["hello", "xhello", "y"],
217         ["hello", "xyhello", "y"],
218         ["hello", "ehllo", "y"],
219         ["hello", "helol", "y"],
220         ["hello", "abcd", "n"],
221         ["hello", "helxxlo", "y"],
222         ["hello", "ehlxxlo", "n"],
223         ["hello", "heaao", "y"],
224         ["_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y"],
225     ];
226     //printf("unittest_speller()\n");
227 
228     string dgarg;
229 
230     string speller_test(const(char)[] s, ref int cost)
231     {
232         assert(s[$-1] != '\0');
233         //printf("speller_test(%s, %s)\n", dgarg, s);
234         cost = 0;
235         if (dgarg == s)
236             return dgarg;
237         return null;
238     }
239 
240     dgarg = "hell";
241     auto p = speller!speller_test("hello");
242     assert(p !is null);
243     foreach (testCase; cases)
244     {
245         //printf("case [%d]\n", i);
246         dgarg = testCase[1];
247         auto p2 = speller!speller_test(testCase[0]);
248         if (p2)
249             assert(testCase[2][0] == 'y');
250         else
251             assert(testCase[2][0] == 'n');
252     }
253     //printf("unittest_speller() success\n");
254 }