1 /** 2 * Try to detect typos in identifiers. 3 * 4 * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved 5 * Authors: Walter Bright, http://www.digitalmars.com 6 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 7 * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/speller.d, root/_speller.d) 8 * Documentation: https://dlang.org/phobos/dmd_root_speller.html 9 * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/speller.d 10 */ 11 12 module dmd.root.speller; 13 14 import core.stdc.stdlib; 15 import core.stdc.string; 16 17 immutable string idchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; 18 19 /************************************************** 20 * combine a new result from the spell checker to 21 * find the one with the closest symbol with 22 * respect to the cost defined by the search function 23 * Input/Output: 24 * p best found spelling (NULL if none found yet) 25 * cost cost of p (int.max if none found yet) 26 * Input: 27 * np new found spelling (NULL if none found) 28 * ncost cost of np if non-NULL 29 * Returns: 30 * true if the cost is less or equal 0 31 * false otherwise 32 */ 33 private bool combineSpellerResult(T)(ref T p, ref int cost, T np, int ncost) 34 { 35 if (np && ncost < cost) 36 { 37 p = np; 38 cost = ncost; 39 if (cost <= 0) 40 return true; 41 } 42 return false; 43 } 44 45 private auto spellerY(alias dg)(const(char)[] seed, size_t index, ref int cost) 46 { 47 if (!seed.length) 48 return null; 49 char[30] tmp; 50 char[] buf; 51 if (seed.length <= tmp.sizeof - 1) 52 buf = tmp; 53 else 54 { 55 buf = (cast(char*)alloca(seed.length + 1))[0 .. seed.length + 1]; // leave space for extra char 56 if (!buf.ptr) 57 return null; // no matches 58 } 59 buf[0 .. index] = seed[0 .. index]; 60 cost = int.max; 61 searchFunctionType!dg p = null; 62 int ncost; 63 /* Delete at seed[index] */ 64 if (index < seed.length) 65 { 66 buf[index .. seed.length - 1] = seed[index + 1 .. $]; 67 auto np = dg(buf[0 .. seed.length - 1], ncost); 68 if (combineSpellerResult(p, cost, np, ncost)) 69 return p; 70 } 71 /* Substitutions */ 72 if (index < seed.length) 73 { 74 buf[0 .. seed.length] = seed; 75 foreach (s; idchars) 76 { 77 buf[index] = s; 78 //printf("sub buf = '%s'\n", buf); 79 auto np = dg(buf[0 .. seed.length], ncost); 80 if (combineSpellerResult(p, cost, np, ncost)) 81 return p; 82 } 83 } 84 /* Insertions */ 85 buf[index + 1 .. seed.length + 1] = seed[index .. $]; 86 foreach (s; idchars) 87 { 88 buf[index] = s; 89 //printf("ins buf = '%s'\n", buf); 90 auto np = dg(buf[0 .. seed.length + 1], ncost); 91 if (combineSpellerResult(p, cost, np, ncost)) 92 return p; 93 } 94 return p; // return "best" result 95 } 96 97 private auto spellerX(alias dg)(const(char)[] seed, bool flag) 98 { 99 if (!seed.length) 100 return null; 101 char[30] tmp; 102 char[] buf; 103 if (seed.length <= tmp.sizeof - 1) 104 buf = tmp; 105 else 106 { 107 buf = (cast(char*)alloca(seed.length + 1))[0 .. seed.length + 1]; // leave space for extra char 108 } 109 int cost = int.max, ncost; 110 searchFunctionType!dg p = null, np; 111 /* Deletions */ 112 buf[0 .. seed.length - 1] = seed[1 .. $]; 113 for (size_t i = 0; i < seed.length; i++) 114 { 115 //printf("del buf = '%s'\n", buf); 116 if (flag) 117 np = spellerY!dg(buf[0 .. seed.length - 1], i, ncost); 118 else 119 np = dg(buf[0 .. seed.length - 1], ncost); 120 if (combineSpellerResult(p, cost, np, ncost)) 121 return p; 122 buf[i] = seed[i]; 123 } 124 /* Transpositions */ 125 if (!flag) 126 { 127 buf[0 .. seed.length] = seed; 128 for (size_t i = 0; i + 1 < seed.length; i++) 129 { 130 // swap [i] and [i + 1] 131 buf[i] = seed[i + 1]; 132 buf[i + 1] = seed[i]; 133 //printf("tra buf = '%s'\n", buf); 134 if (combineSpellerResult(p, cost, dg(buf[0 .. seed.length], ncost), ncost)) 135 return p; 136 buf[i] = seed[i]; 137 } 138 } 139 /* Substitutions */ 140 buf[0 .. seed.length] = seed; 141 for (size_t i = 0; i < seed.length; i++) 142 { 143 foreach (s; idchars) 144 { 145 buf[i] = s; 146 //printf("sub buf = '%s'\n", buf); 147 if (flag) 148 np = spellerY!dg(buf[0 .. seed.length], i + 1, ncost); 149 else 150 np = dg(buf[0 .. seed.length], ncost); 151 if (combineSpellerResult(p, cost, np, ncost)) 152 return p; 153 } 154 buf[i] = seed[i]; 155 } 156 /* Insertions */ 157 buf[1 .. seed.length + 1] = seed; 158 for (size_t i = 0; i <= seed.length; i++) // yes, do seed.length+1 iterations 159 { 160 foreach (s; idchars) 161 { 162 buf[i] = s; 163 //printf("ins buf = '%s'\n", buf); 164 if (flag) 165 np = spellerY!dg(buf[0 .. seed.length + 1], i + 1, ncost); 166 else 167 np = dg(buf[0 .. seed.length + 1], ncost); 168 if (combineSpellerResult(p, cost, np, ncost)) 169 return p; 170 } 171 if (i < seed.length) 172 buf[i] = seed[i]; 173 } 174 return p; // return "best" result 175 } 176 177 /************************************************** 178 * Looks for correct spelling. 179 * Currently only looks a 'distance' of one from the seed[]. 180 * This does an exhaustive search, so can potentially be very slow. 181 * Params: 182 * seed = wrongly spelled word 183 * dg = search delegate 184 * Returns: 185 * null = no correct spellings found, otherwise 186 * the value returned by dg() for first possible correct spelling 187 */ 188 auto speller(alias dg)(const(char)[] seed) 189 if (isSearchFunction!dg) 190 { 191 size_t maxdist = seed.length < 4 ? seed.length / 2 : 2; 192 for (int distance = 0; distance < maxdist; distance++) 193 { 194 auto p = spellerX!dg(seed, distance > 0); 195 if (p) 196 return p; 197 // if (seedlen > 10) 198 // break; 199 } 200 return null; // didn't find it 201 } 202 203 enum isSearchFunction(alias fun) = is(searchFunctionType!fun); 204 alias searchFunctionType(alias fun) = typeof(() {int x; return fun("", x);}()); 205 206 unittest 207 { 208 static immutable string[][] cases = 209 [ 210 ["hello", "hell", "y"], 211 ["hello", "hel", "y"], 212 ["hello", "ello", "y"], 213 ["hello", "llo", "y"], 214 ["hello", "hellox", "y"], 215 ["hello", "helloxy", "y"], 216 ["hello", "xhello", "y"], 217 ["hello", "xyhello", "y"], 218 ["hello", "ehllo", "y"], 219 ["hello", "helol", "y"], 220 ["hello", "abcd", "n"], 221 ["hello", "helxxlo", "y"], 222 ["hello", "ehlxxlo", "n"], 223 ["hello", "heaao", "y"], 224 ["_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y"], 225 ]; 226 //printf("unittest_speller()\n"); 227 228 string dgarg; 229 230 string speller_test(const(char)[] s, ref int cost) 231 { 232 assert(s[$-1] != '\0'); 233 //printf("speller_test(%s, %s)\n", dgarg, s); 234 cost = 0; 235 if (dgarg == s) 236 return dgarg; 237 return null; 238 } 239 240 dgarg = "hell"; 241 auto p = speller!speller_test("hello"); 242 assert(p !is null); 243 foreach (testCase; cases) 244 { 245 //printf("case [%d]\n", i); 246 dgarg = testCase[1]; 247 auto p2 = speller!speller_test(testCase[0]); 248 if (p2) 249 assert(testCase[2][0] == 'y'); 250 else 251 assert(testCase[2][0] == 'n'); 252 } 253 //printf("unittest_speller() success\n"); 254 }