[1014] | 1 | package com.swabunga.spell.engine; |
---|
| 2 | |
---|
| 3 | |
---|
| 4 | /** |
---|
| 5 | * A phonetic encoding algorithm that takes an English word and computes a phonetic version of it. This |
---|
| 6 | * allows for phonetic matches in a spell checker. This class is a port of the C++ DoubleMetaphone() class, |
---|
| 7 | * which was intended to return two possible phonetic translations for certain words, although the Java version |
---|
| 8 | * only seems to be concerned with one, making the "double" part erroneous. |
---|
| 9 | * <br> |
---|
| 10 | * source code for the original C++ can be found |
---|
| 11 | * here: <a href="http://aspell.sourceforge.net/metaphone/"/>http://aspell.sourceforge.net/metaphone/</a> |
---|
| 12 | * DoubleMetaphone does some processing, such as uppercasing, on the input string first to normalize it. Then, to |
---|
| 13 | * create the key, the function traverses the input string in a while loop, sending sucessive characters into a giant |
---|
| 14 | * switch statement. Before determining the appropriate pronunciation, the algorithm considers the context |
---|
| 15 | * surrounding each character within the input string. |
---|
| 16 | * <p> |
---|
| 17 | * Things that were changed: |
---|
| 18 | * <br/>The alternate flag could be set to true but was never checked so why bother with it. REMOVED |
---|
| 19 | * <br/>Why was this class serializable? |
---|
| 20 | * <br/>The primary, in, length and last variables could be initialized and local to the |
---|
| 21 | * process method and references passed arround the appropriate methods. As such there are |
---|
| 22 | * no class variables and this class becomes firstly threadsafe and secondly could be static final. |
---|
| 23 | * <br/>The function call SlavoGermaic was called repeatedly in the process function, it is now only called once. |
---|
| 24 | * |
---|
| 25 | */ |
---|
| 26 | public class DoubleMeta implements Transformator { |
---|
| 27 | |
---|
| 28 | /** |
---|
| 29 | * The replace list is used in the getSuggestions method. |
---|
| 30 | * All of the letters in the misspelled word are replaced with the characters from |
---|
| 31 | * this list to try and generate more suggestions, which implies l*n tries, |
---|
| 32 | * if l is the size of the string, and n is the size of this list. |
---|
| 33 | * |
---|
| 34 | * In addition to that, each of these letters is added to the mispelled word. |
---|
| 35 | */ |
---|
| 36 | private static char[] replaceList = {'A', 'B', 'X', 'S', 'K', 'J', 'T', 'F', 'H', 'L', 'M', 'N', 'P', 'R', '0'}; |
---|
| 37 | |
---|
| 38 | |
---|
| 39 | private static final String[] myList = {"GN", "KN", "PN", "WR", "PS", ""}; |
---|
| 40 | private static final String[] list1 = {"ACH", ""}; |
---|
| 41 | private static final String[] list2 = {"BACHER", "MACHER", ""}; |
---|
| 42 | private static final String[] list3 = {"CAESAR", ""}; |
---|
| 43 | private static final String[] list4 = {"CHIA", ""}; |
---|
| 44 | private static final String[] list5 = {"CH", ""}; |
---|
| 45 | private static final String[] list6 = {"CHAE", ""}; |
---|
| 46 | private static final String[] list7 = {"HARAC", "HARIS", ""}; |
---|
| 47 | private static final String[] list8 = {"HOR", "HYM", "HIA", "HEM", ""}; |
---|
| 48 | private static final String[] list9 = {"CHORE", ""}; |
---|
| 49 | private static final String[] list10 = {"VAN ", "VON ", ""}; |
---|
| 50 | private static final String[] list11 = {"SCH", ""}; |
---|
| 51 | private static final String[] list12 = {"ORCHES", "ARCHIT", "ORCHID", ""}; |
---|
| 52 | private static final String[] list13 = {"T", "S", ""}; |
---|
| 53 | private static final String[] list14 = {"A", "O", "U", "E", ""}; |
---|
| 54 | private static final String[] list15 = {"L", "R", "N", "M", "B", "H", "F", "V", "W", " ", ""}; |
---|
| 55 | private static final String[] list16 = {"MC", ""}; |
---|
| 56 | private static final String[] list17 = {"CZ", ""}; |
---|
| 57 | private static final String[] list18 = {"WICZ", ""}; |
---|
| 58 | private static final String[] list19 = {"CIA", ""}; |
---|
| 59 | private static final String[] list20 = {"CC", ""}; |
---|
| 60 | private static final String[] list21 = {"I", "E", "H", ""}; |
---|
| 61 | private static final String[] list22 = {"HU", ""}; |
---|
| 62 | private static final String[] list23 = {"UCCEE", "UCCES", ""}; |
---|
| 63 | private static final String[] list24 = {"CK", "CG", "CQ", ""}; |
---|
| 64 | private static final String[] list25 = {"CI", "CE", "CY", ""}; |
---|
| 65 | // DMV: used by the orininal code which returned two phonetic code, but not the current code |
---|
| 66 | // private static final String[] list26 = { |
---|
| 67 | // "CIO", "CIE", "CIA", "" |
---|
| 68 | // }; |
---|
| 69 | private static final String[] list27 = {" C", " Q", " G", ""}; |
---|
| 70 | private static final String[] list28 = {"C", "K", "Q", ""}; |
---|
| 71 | private static final String[] list29 = {"CE", "CI", ""}; |
---|
| 72 | private static final String[] list30 = {"DG", ""}; |
---|
| 73 | private static final String[] list31 = {"I", "E", "Y", ""}; |
---|
| 74 | private static final String[] list32 = {"DT", "DD", ""}; |
---|
| 75 | private static final String[] list33 = {"B", "H", "D", ""}; |
---|
| 76 | private static final String[] list34 = {"B", "H", "D", ""}; |
---|
| 77 | private static final String[] list35 = {"B", "H", ""}; |
---|
| 78 | private static final String[] list36 = {"C", "G", "L", "R", "T", ""}; |
---|
| 79 | private static final String[] list37 = {"EY", ""}; |
---|
| 80 | private static final String[] list38 = {"LI", ""}; |
---|
| 81 | private static final String[] list39 = {"ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", ""}; |
---|
| 82 | private static final String[] list40 = {"ER", ""}; |
---|
| 83 | private static final String[] list41 = {"DANGER", "RANGER", "MANGER", ""}; |
---|
| 84 | private static final String[] list42 = {"E", "I", ""}; |
---|
| 85 | private static final String[] list43 = {"RGY", "OGY", ""}; |
---|
| 86 | private static final String[] list44 = {"E", "I", "Y", ""}; |
---|
| 87 | private static final String[] list45 = {"AGGI", "OGGI", ""}; |
---|
| 88 | private static final String[] list46 = {"VAN ", "VON ", ""}; |
---|
| 89 | private static final String[] list47 = {"SCH", ""}; |
---|
| 90 | private static final String[] list48 = {"ET", ""}; |
---|
| 91 | |
---|
| 92 | // DMV: used by the orininal code which returned two phonetic code, but not the current code |
---|
| 93 | // private static final String[] list49 = { |
---|
| 94 | // "IER ", "" |
---|
| 95 | // }; |
---|
| 96 | private static final String[] list50 = {"JOSE", ""}; |
---|
| 97 | private static final String[] list51 = {"SAN ", ""}; |
---|
| 98 | private static final String[] list52 = {"SAN ", ""}; |
---|
| 99 | private static final String[] list53 = {"JOSE", ""}; |
---|
| 100 | private static final String[] list54 = {"L", "T", "K", "S", "N", "M", "B", "Z", ""}; |
---|
| 101 | private static final String[] list55 = {"S", "K", "L", ""}; |
---|
| 102 | private static final String[] list56 = {"ILLO", "ILLA", "ALLE", ""}; |
---|
| 103 | private static final String[] list57 = {"AS", "OS", ""}; |
---|
| 104 | private static final String[] list58 = {"A", "O", ""}; |
---|
| 105 | private static final String[] list59 = {"ALLE", ""}; |
---|
| 106 | private static final String[] list60 = {"UMB", ""}; |
---|
| 107 | private static final String[] list61 = {"ER", ""}; |
---|
| 108 | private static final String[] list62 = {"P", "B", ""}; |
---|
| 109 | private static final String[] list63 = {"IE", ""}; |
---|
| 110 | private static final String[] list64 = {"ME", "MA", ""}; |
---|
| 111 | private static final String[] list65 = {"ISL", "YSL", ""}; |
---|
| 112 | private static final String[] list66 = {"SUGAR", ""}; |
---|
| 113 | private static final String[] list67 = {"SH", ""}; |
---|
| 114 | private static final String[] list68 = {"HEIM", "HOEK", "HOLM", "HOLZ", ""}; |
---|
| 115 | private static final String[] list69 = {"SIO", "SIA", ""}; |
---|
| 116 | private static final String[] list70 = {"SIAN", ""}; |
---|
| 117 | private static final String[] list71 = {"M", "N", "L", "W", ""}; |
---|
| 118 | private static final String[] list72 = {"Z", ""}; |
---|
| 119 | private static final String[] list73 = {"Z", ""}; |
---|
| 120 | private static final String[] list74 = {"SC", ""}; |
---|
| 121 | private static final String[] list75 = {"OO", "ER", "EN", "UY", "ED", "EM", ""}; |
---|
| 122 | private static final String[] list76 = {"ER", "EN", ""}; |
---|
| 123 | private static final String[] list77 = {"I", "E", "Y", ""}; |
---|
| 124 | private static final String[] list78 = {"AI", "OI", ""}; |
---|
| 125 | private static final String[] list79 = {"S", "Z", ""}; |
---|
| 126 | private static final String[] list80 = {"TION", ""}; |
---|
| 127 | private static final String[] list81 = {"TIA", "TCH", ""}; |
---|
| 128 | private static final String[] list82 = {"TH", ""}; |
---|
| 129 | private static final String[] list83 = {"TTH", ""}; |
---|
| 130 | private static final String[] list84 = {"OM", "AM", ""}; |
---|
| 131 | private static final String[] list85 = {"VAN ", "VON ", ""}; |
---|
| 132 | private static final String[] list86 = {"SCH", ""}; |
---|
| 133 | private static final String[] list87 = {"T", "D", ""}; |
---|
| 134 | private static final String[] list88 = {"WR", ""}; |
---|
| 135 | private static final String[] list89 = {"WH", ""}; |
---|
| 136 | private static final String[] list90 = {"EWSKI", "EWSKY", "OWSKI", "OWSKY", ""}; |
---|
| 137 | private static final String[] list91 = {"SCH", ""}; |
---|
| 138 | private static final String[] list92 = {"WICZ", "WITZ", ""}; |
---|
| 139 | private static final String[] list93 = {"IAU", "EAU", ""}; |
---|
| 140 | private static final String[] list94 = {"AU", "OU", ""}; |
---|
| 141 | private static final String[] list95 = {"C", "X", ""}; |
---|
| 142 | |
---|
| 143 | // DMV: used by the orininal code which returned two phonetic code, but not the current code |
---|
| 144 | // private static final String[] list96 = { |
---|
| 145 | // "ZO", "ZI", "ZA", "" |
---|
| 146 | // }; |
---|
| 147 | |
---|
| 148 | /** |
---|
| 149 | * put your documentation comment here |
---|
| 150 | * @return |
---|
| 151 | */ |
---|
| 152 | private final static boolean SlavoGermanic(String in) { |
---|
| 153 | if ((in.indexOf("W") > -1) || (in.indexOf("K") > -1) || (in.indexOf("CZ") > -1) || (in.indexOf("WITZ") > -1)) |
---|
| 154 | return true; |
---|
| 155 | return false; |
---|
| 156 | } |
---|
| 157 | |
---|
| 158 | /** |
---|
| 159 | * put your documentation comment here |
---|
| 160 | * @param main |
---|
| 161 | */ |
---|
| 162 | private final static void MetaphAdd(StringBuffer primary, String main) { |
---|
| 163 | if (main != null) { |
---|
| 164 | primary.append(main); |
---|
| 165 | } |
---|
| 166 | } |
---|
| 167 | |
---|
| 168 | private final static void MetaphAdd(StringBuffer primary, char main) { |
---|
| 169 | primary.append(main); |
---|
| 170 | } |
---|
| 171 | |
---|
| 172 | /** |
---|
| 173 | * put your documentation comment here |
---|
| 174 | * @param at |
---|
| 175 | * @return |
---|
| 176 | */ |
---|
| 177 | private final static boolean isVowel(String in, int at, int length) { |
---|
| 178 | if ((at < 0) || (at >= length)) |
---|
| 179 | return false; |
---|
| 180 | char it = in.charAt(at); |
---|
| 181 | if ((it == 'A') || (it == 'E') || (it == 'I') || (it == 'O') || (it == 'U') || (it == 'Y')) |
---|
| 182 | return true; |
---|
| 183 | return false; |
---|
| 184 | } |
---|
| 185 | |
---|
| 186 | /** |
---|
| 187 | * put your documentation comment here |
---|
| 188 | * @param string |
---|
| 189 | * @param start |
---|
| 190 | * @param length |
---|
| 191 | * @param list |
---|
| 192 | * @return |
---|
| 193 | */ |
---|
| 194 | private final static boolean stringAt(String string, int start, int length, String[] list) { |
---|
| 195 | if ((start < 0) || (start >= string.length()) || list.length == 0) |
---|
| 196 | return false; |
---|
| 197 | String substr = string.substring(start, start + length); |
---|
| 198 | for (int i = 0; i < list.length; i++) { |
---|
| 199 | if (list[i].equals(substr)) |
---|
| 200 | return true; |
---|
| 201 | } |
---|
| 202 | return false; |
---|
| 203 | } |
---|
| 204 | |
---|
| 205 | /** |
---|
| 206 | * Take the given word, and return the best phonetic hash for it. |
---|
| 207 | * Vowels are minimized as much as possible, and consenants |
---|
| 208 | * that have similiar sounds are converted to the same consenant |
---|
| 209 | * for example, 'v' and 'f' are both converted to 'f' |
---|
| 210 | */ |
---|
| 211 | public final String transform(String word) { |
---|
| 212 | StringBuffer primary = new StringBuffer(word.length() + 5); |
---|
| 213 | String in = word.toUpperCase() + " "; |
---|
| 214 | int current = 0; |
---|
| 215 | int length = in.length(); |
---|
| 216 | if (length < 1) |
---|
| 217 | return ""; |
---|
| 218 | int last = length - 1; |
---|
| 219 | boolean isSlavoGermaic = SlavoGermanic(in); |
---|
| 220 | if (stringAt(in, 0, 2, myList)) |
---|
| 221 | current += 1; |
---|
| 222 | if (in.charAt(0) == 'X') { |
---|
| 223 | MetaphAdd(primary, 'S'); |
---|
| 224 | current += 1; |
---|
| 225 | } |
---|
| 226 | while (current < length) { |
---|
| 227 | switch (in.charAt(current)) { |
---|
| 228 | case 'A': |
---|
| 229 | case 'E': |
---|
| 230 | case 'I': |
---|
| 231 | case 'O': |
---|
| 232 | case 'U': |
---|
| 233 | case 'Y': |
---|
| 234 | if (current == 0) |
---|
| 235 | MetaphAdd(primary, 'A'); |
---|
| 236 | current += 1; |
---|
| 237 | break; |
---|
| 238 | case 'B': |
---|
| 239 | MetaphAdd(primary, 'P'); |
---|
| 240 | if (in.charAt(current + 1) == 'B') |
---|
| 241 | current += 2; |
---|
| 242 | else |
---|
| 243 | current += 1; |
---|
| 244 | break; |
---|
| 245 | case '\u00C7': |
---|
| 246 | MetaphAdd(primary, 'S'); |
---|
| 247 | current += 1; |
---|
| 248 | break; |
---|
| 249 | case 'C': |
---|
| 250 | if ((current > 1) && !isVowel(in, current - 2, length) && stringAt(in, (current - 1), 3, list1) && (in.charAt(current + 2) != 'I') && (in.charAt(current + 2) != 'E') || stringAt(in, (current - 2), 6, list2)) { |
---|
| 251 | MetaphAdd(primary, 'K'); |
---|
| 252 | current += 2; |
---|
| 253 | break; |
---|
| 254 | } |
---|
| 255 | if ((current == 0) && stringAt(in, current, 6, list3)) { |
---|
| 256 | MetaphAdd(primary, 'S'); |
---|
| 257 | current += 2; |
---|
| 258 | break; |
---|
| 259 | } |
---|
| 260 | if (stringAt(in, current, 4, list4)) { |
---|
| 261 | MetaphAdd(primary, 'K'); |
---|
| 262 | current += 2; |
---|
| 263 | break; |
---|
| 264 | } |
---|
| 265 | if (stringAt(in, current, 2, list5)) { |
---|
| 266 | if ((current > 0) && stringAt(in, current, 4, list6)) { |
---|
| 267 | MetaphAdd(primary, 'K'); |
---|
| 268 | current += 2; |
---|
| 269 | break; |
---|
| 270 | } |
---|
| 271 | if ((current == 0) && stringAt(in, (current + 1), 5, list7) || stringAt(in, current + 1, 3, list8) && !stringAt(in, 0, 5, list9)) { |
---|
| 272 | MetaphAdd(primary, 'K'); |
---|
| 273 | current += 2; |
---|
| 274 | break; |
---|
| 275 | } |
---|
| 276 | if (stringAt(in, 0, 4, list10) || stringAt(in, 0, 3, list11) || stringAt(in, current - 2, 6, list12) || stringAt(in, current + 2, 1, list13) || (stringAt(in, current - 1, 1, list14) || (current == 0)) && stringAt(in, current + 2, 1, list15)) { |
---|
| 277 | MetaphAdd(primary, 'K'); |
---|
| 278 | } else { |
---|
| 279 | if (current > 0) { |
---|
| 280 | if (stringAt(in, 0, 2, list16)) |
---|
| 281 | MetaphAdd(primary, 'K'); |
---|
| 282 | else |
---|
| 283 | MetaphAdd(primary, 'X'); |
---|
| 284 | } else { |
---|
| 285 | MetaphAdd(primary, 'X'); |
---|
| 286 | } |
---|
| 287 | } |
---|
| 288 | current += 2; |
---|
| 289 | break; |
---|
| 290 | } |
---|
| 291 | if (stringAt(in, current, 2, list17) && !stringAt(in, current, 4, list18)) { |
---|
| 292 | MetaphAdd(primary, 'S'); |
---|
| 293 | current += 2; |
---|
| 294 | break; |
---|
| 295 | } |
---|
| 296 | if (stringAt(in, current, 2, list19)) { |
---|
| 297 | MetaphAdd(primary, 'X'); |
---|
| 298 | current += 2; |
---|
| 299 | break; |
---|
| 300 | } |
---|
| 301 | if (stringAt(in, current, 2, list20) && !((current == 1) && in.charAt(0) == 'M')) { |
---|
| 302 | if (stringAt(in, current + 2, 1, list21) && !stringAt(in, current + 2, 2, list22)) { |
---|
| 303 | if (((current == 1) && (in.charAt(current - 1) == 'A')) || stringAt(in, (current - 1), 5, list23)) |
---|
| 304 | MetaphAdd(primary, "KS"); |
---|
| 305 | else |
---|
| 306 | MetaphAdd(primary, 'X'); |
---|
| 307 | current += 3; |
---|
| 308 | break; |
---|
| 309 | } else { |
---|
| 310 | MetaphAdd(primary, 'K'); |
---|
| 311 | current += 2; |
---|
| 312 | break; |
---|
| 313 | } |
---|
| 314 | } |
---|
| 315 | if (stringAt(in, current, 2, list24)) { |
---|
| 316 | MetaphAdd(primary, 'K'); |
---|
| 317 | current += 2; |
---|
| 318 | break; |
---|
| 319 | } else if (stringAt(in, current, 2, list25)) { |
---|
| 320 | MetaphAdd(primary, 'S'); |
---|
| 321 | current += 2; |
---|
| 322 | break; |
---|
| 323 | } |
---|
| 324 | |
---|
| 325 | MetaphAdd(primary, 'K'); |
---|
| 326 | if (stringAt(in, current + 1, 2, list27)) |
---|
| 327 | current += 3; |
---|
| 328 | else if (stringAt(in, current + 1, 1, list28) && !stringAt(in, current + 1, 2, list29)) |
---|
| 329 | current += 2; |
---|
| 330 | else |
---|
| 331 | current += 1; |
---|
| 332 | break; |
---|
| 333 | case 'D': |
---|
| 334 | if (stringAt(in, current, 2, list30)) { |
---|
| 335 | if (stringAt(in, current + 2, 1, list31)) { |
---|
| 336 | MetaphAdd(primary, 'J'); |
---|
| 337 | current += 3; |
---|
| 338 | break; |
---|
| 339 | } else { |
---|
| 340 | MetaphAdd(primary, "TK"); |
---|
| 341 | current += 2; |
---|
| 342 | break; |
---|
| 343 | } |
---|
| 344 | } |
---|
| 345 | MetaphAdd(primary, 'T'); |
---|
| 346 | if (stringAt(in, current, 2, list32)) { |
---|
| 347 | current += 2; |
---|
| 348 | } else { |
---|
| 349 | current += 1; |
---|
| 350 | } |
---|
| 351 | break; |
---|
| 352 | case 'F': |
---|
| 353 | if (in.charAt(current + 1) == 'F') |
---|
| 354 | current += 2; |
---|
| 355 | else |
---|
| 356 | current += 1; |
---|
| 357 | MetaphAdd(primary, 'F'); |
---|
| 358 | break; |
---|
| 359 | case 'G': |
---|
| 360 | if (in.charAt(current + 1) == 'H') { |
---|
| 361 | if ((current > 0) && !isVowel(in, current - 1, length)) { |
---|
| 362 | MetaphAdd(primary, 'K'); |
---|
| 363 | current += 2; |
---|
| 364 | break; |
---|
| 365 | } |
---|
| 366 | if (current < 3) { |
---|
| 367 | if (current == 0) { |
---|
| 368 | if (in.charAt(current + 2) == 'I') |
---|
| 369 | MetaphAdd(primary, 'J'); |
---|
| 370 | else |
---|
| 371 | MetaphAdd(primary, 'K'); |
---|
| 372 | current += 2; |
---|
| 373 | break; |
---|
| 374 | } |
---|
| 375 | } |
---|
| 376 | if ((current > 1) && stringAt(in, current - 2, 1, list33) || ((current > 2) && stringAt(in, current - 3, 1, list34)) || ((current > 3) && stringAt(in, current - 4, 1, list35))) { |
---|
| 377 | current += 2; |
---|
| 378 | break; |
---|
| 379 | } else { |
---|
| 380 | if ((current > 2) && (in.charAt(current - 1) == 'U') && stringAt(in, current - 3, 1, list36)) { |
---|
| 381 | MetaphAdd(primary, 'F'); |
---|
| 382 | } else { |
---|
| 383 | if ((current > 0) && (in.charAt(current - 1) != 'I')) |
---|
| 384 | MetaphAdd(primary, 'K'); |
---|
| 385 | } |
---|
| 386 | current += 2; |
---|
| 387 | break; |
---|
| 388 | } |
---|
| 389 | } |
---|
| 390 | if (in.charAt(current + 1) == 'N') { |
---|
| 391 | if ((current == 1) && isVowel(in, 0, length) && !isSlavoGermaic) { |
---|
| 392 | MetaphAdd(primary, "KN"); |
---|
| 393 | } else { |
---|
| 394 | if (!stringAt(in, current + 2, 2, list37) && (in.charAt(current + 1) != 'Y') && !isSlavoGermaic) { |
---|
| 395 | MetaphAdd(primary, "N"); |
---|
| 396 | } else { |
---|
| 397 | MetaphAdd(primary, "KN"); |
---|
| 398 | } |
---|
| 399 | } |
---|
| 400 | current += 2; |
---|
| 401 | break; |
---|
| 402 | } |
---|
| 403 | if (stringAt(in, current + 1, 2, list38) && !isSlavoGermaic) { |
---|
| 404 | MetaphAdd(primary, "KL"); |
---|
| 405 | current += 2; |
---|
| 406 | break; |
---|
| 407 | } |
---|
| 408 | if ((current == 0) && ((in.charAt(current + 1) == 'Y') || stringAt(in, current + 1, 2, list39))) { |
---|
| 409 | MetaphAdd(primary, 'K'); |
---|
| 410 | current += 2; |
---|
| 411 | break; |
---|
| 412 | } |
---|
| 413 | if ((stringAt(in, current + 1, 2, list40) || (in.charAt(current + 1) == 'Y')) && !stringAt(in, 0, 6, list41) && !stringAt(in, current - 1, 1, list42) && !stringAt(in, current - 1, 3, list43)) { |
---|
| 414 | MetaphAdd(primary, 'K'); |
---|
| 415 | current += 2; |
---|
| 416 | break; |
---|
| 417 | } |
---|
| 418 | if (stringAt(in, current + 1, 1, list44) || stringAt(in, current - 1, 4, list45)) { |
---|
| 419 | if (stringAt(in, 0, 4, list46) || stringAt(in, 0, 3, list47) || stringAt(in, current + 1, 2, list48)) { |
---|
| 420 | MetaphAdd(primary, 'K'); |
---|
| 421 | } else { |
---|
| 422 | MetaphAdd(primary, 'J'); |
---|
| 423 | } |
---|
| 424 | current += 2; |
---|
| 425 | break; |
---|
| 426 | } |
---|
| 427 | if (in.charAt(current + 1) == 'G') |
---|
| 428 | current += 2; |
---|
| 429 | else |
---|
| 430 | current += 1; |
---|
| 431 | MetaphAdd(primary, 'K'); |
---|
| 432 | break; |
---|
| 433 | case 'H': |
---|
| 434 | if (((current == 0) || isVowel(in, current - 1, length)) && isVowel(in, current + 1, length)) { |
---|
| 435 | MetaphAdd(primary, 'H'); |
---|
| 436 | current += 2; |
---|
| 437 | } else { |
---|
| 438 | current += 1; |
---|
| 439 | } |
---|
| 440 | break; |
---|
| 441 | case 'J': |
---|
| 442 | if (stringAt(in, current, 4, list50) || stringAt(in, 0, 4, list51)) { |
---|
| 443 | if ((current == 0) && (in.charAt(current + 4) == ' ') || stringAt(in, 0, 4, list52)) { |
---|
| 444 | MetaphAdd(primary, 'H'); |
---|
| 445 | } else { |
---|
| 446 | MetaphAdd(primary, 'J'); |
---|
| 447 | } |
---|
| 448 | current += 1; |
---|
| 449 | break; |
---|
| 450 | } |
---|
| 451 | if ((current == 0) && !stringAt(in, current, 4, list53)) { |
---|
| 452 | MetaphAdd(primary, 'J'); |
---|
| 453 | } else { |
---|
| 454 | if (isVowel(in, current - 1, length) && !isSlavoGermaic && ((in.charAt(current + 1) == 'A') || in.charAt(current + 1) == 'O')) { |
---|
| 455 | MetaphAdd(primary, 'J'); |
---|
| 456 | } else { |
---|
| 457 | if (current == last) { |
---|
| 458 | MetaphAdd(primary, 'J'); |
---|
| 459 | } else { |
---|
| 460 | if (!stringAt(in, current + 1, 1, list54) && !stringAt(in, current - 1, 1, list55)) { |
---|
| 461 | MetaphAdd(primary, 'J'); |
---|
| 462 | } |
---|
| 463 | } |
---|
| 464 | } |
---|
| 465 | } |
---|
| 466 | if (in.charAt(current + 1) == 'J') |
---|
| 467 | current += 2; |
---|
| 468 | else |
---|
| 469 | current += 1; |
---|
| 470 | break; |
---|
| 471 | case 'K': |
---|
| 472 | if (in.charAt(current + 1) == 'K') |
---|
| 473 | current += 2; |
---|
| 474 | else |
---|
| 475 | current += 1; |
---|
| 476 | MetaphAdd(primary, 'K'); |
---|
| 477 | break; |
---|
| 478 | case 'L': |
---|
| 479 | if (in.charAt(current + 1) == 'L') { |
---|
| 480 | if (((current == (length - 3)) && stringAt(in, current - 1, 4, list56)) || ((stringAt(in, last - 1, 2, list57) || stringAt(in, last, 1, list58)) && stringAt(in, current - 1, 4, list59))) { |
---|
| 481 | MetaphAdd(primary, 'L'); |
---|
| 482 | current += 2; |
---|
| 483 | break; |
---|
| 484 | } |
---|
| 485 | current += 2; |
---|
| 486 | } else |
---|
| 487 | current += 1; |
---|
| 488 | MetaphAdd(primary, 'L'); |
---|
| 489 | break; |
---|
| 490 | case 'M': |
---|
| 491 | if ((stringAt(in, current - 1, 3, list60) && (((current + 1) == last) || stringAt(in, current + 2, 2, list61))) || (in.charAt(current + 1) == 'M')) |
---|
| 492 | current += 2; |
---|
| 493 | else |
---|
| 494 | current += 1; |
---|
| 495 | MetaphAdd(primary, 'M'); |
---|
| 496 | break; |
---|
| 497 | case 'N': |
---|
| 498 | if (in.charAt(current + 1) == 'N') |
---|
| 499 | current += 2; |
---|
| 500 | else |
---|
| 501 | current += 1; |
---|
| 502 | MetaphAdd(primary, 'N'); |
---|
| 503 | break; |
---|
| 504 | case '\u00D1': |
---|
| 505 | current += 1; |
---|
| 506 | MetaphAdd(primary, 'N'); |
---|
| 507 | break; |
---|
| 508 | case 'P': |
---|
| 509 | if (in.charAt(current + 1) == 'N') { |
---|
| 510 | MetaphAdd(primary, 'F'); |
---|
| 511 | current += 2; |
---|
| 512 | break; |
---|
| 513 | } |
---|
| 514 | if (stringAt(in, current + 1, 1, list62)) |
---|
| 515 | current += 2; |
---|
| 516 | else |
---|
| 517 | current += 1; |
---|
| 518 | MetaphAdd(primary, 'P'); |
---|
| 519 | break; |
---|
| 520 | case 'Q': |
---|
| 521 | if (in.charAt(current + 1) == 'Q') |
---|
| 522 | current += 2; |
---|
| 523 | else |
---|
| 524 | current += 1; |
---|
| 525 | MetaphAdd(primary, 'K'); |
---|
| 526 | break; |
---|
| 527 | case 'R': |
---|
| 528 | if ((current == last) && !isSlavoGermaic && stringAt(in, current - 2, 2, list63) && !stringAt(in, current - 4, 2, list64)) { |
---|
| 529 | // MetaphAdd(primary, ""); |
---|
| 530 | } else |
---|
| 531 | MetaphAdd(primary, 'R'); |
---|
| 532 | if (in.charAt(current + 1) == 'R') |
---|
| 533 | current += 2; |
---|
| 534 | else |
---|
| 535 | current += 1; |
---|
| 536 | break; |
---|
| 537 | case 'S': |
---|
| 538 | if (stringAt(in, current - 1, 3, list65)) { |
---|
| 539 | current += 1; |
---|
| 540 | break; |
---|
| 541 | } |
---|
| 542 | if ((current == 0) && stringAt(in, current, 5, list66)) { |
---|
| 543 | MetaphAdd(primary, 'X'); |
---|
| 544 | current += 1; |
---|
| 545 | break; |
---|
| 546 | } |
---|
| 547 | if (stringAt(in, current, 2, list67)) { |
---|
| 548 | if (stringAt(in, current + 1, 4, list68)) |
---|
| 549 | MetaphAdd(primary, 'S'); |
---|
| 550 | else |
---|
| 551 | MetaphAdd(primary, 'X'); |
---|
| 552 | current += 2; |
---|
| 553 | break; |
---|
| 554 | } |
---|
| 555 | if (stringAt(in, current, 3, list69) || stringAt(in, current, 4, list70)) { |
---|
| 556 | MetaphAdd(primary, 'S'); |
---|
| 557 | current += 3; |
---|
| 558 | break; |
---|
| 559 | } |
---|
| 560 | if (((current == 0) && stringAt(in, current + 1, 1, list71)) || stringAt(in, current + 1, 1, list72)) { |
---|
| 561 | MetaphAdd(primary, 'S'); |
---|
| 562 | if (stringAt(in, current + 1, 1, list73)) |
---|
| 563 | current += 2; |
---|
| 564 | else |
---|
| 565 | current += 1; |
---|
| 566 | break; |
---|
| 567 | } |
---|
| 568 | if (stringAt(in, current, 2, list74)) { |
---|
| 569 | if (in.charAt(current + 2) == 'H') |
---|
| 570 | if (stringAt(in, current + 3, 2, list75)) { |
---|
| 571 | if (stringAt(in, current + 3, 2, list76)) { |
---|
| 572 | MetaphAdd(primary, "X"); |
---|
| 573 | } else { |
---|
| 574 | MetaphAdd(primary, "SK"); |
---|
| 575 | } |
---|
| 576 | current += 3; |
---|
| 577 | break; |
---|
| 578 | } else { |
---|
| 579 | MetaphAdd(primary, 'X'); |
---|
| 580 | current += 3; |
---|
| 581 | break; |
---|
| 582 | } |
---|
| 583 | if (stringAt(in, current + 2, 1, list77)) { |
---|
| 584 | MetaphAdd(primary, 'S'); |
---|
| 585 | current += 3; |
---|
| 586 | break; |
---|
| 587 | } |
---|
| 588 | MetaphAdd(primary, "SK"); |
---|
| 589 | current += 3; |
---|
| 590 | break; |
---|
| 591 | } |
---|
| 592 | if ((current == last) && stringAt(in, current - 2, 2, list78)) { |
---|
| 593 | //MetaphAdd(primary, ""); |
---|
| 594 | } else |
---|
| 595 | MetaphAdd(primary, 'S'); |
---|
| 596 | if (stringAt(in, current + 1, 1, list79)) |
---|
| 597 | current += 2; |
---|
| 598 | else |
---|
| 599 | current += 1; |
---|
| 600 | break; |
---|
| 601 | case 'T': |
---|
| 602 | if (stringAt(in, current, 4, list80)) { |
---|
| 603 | MetaphAdd(primary, 'X'); |
---|
| 604 | current += 3; |
---|
| 605 | break; |
---|
| 606 | } |
---|
| 607 | if (stringAt(in, current, 3, list81)) { |
---|
| 608 | MetaphAdd(primary, 'X'); |
---|
| 609 | current += 3; |
---|
| 610 | break; |
---|
| 611 | } |
---|
| 612 | if (stringAt(in, current, 2, list82) || stringAt(in, current, 3, list83)) { |
---|
| 613 | if (stringAt(in, (current + 2), 2, list84) || stringAt(in, 0, 4, list85) || stringAt(in, 0, 3, list86)) { |
---|
| 614 | MetaphAdd(primary, 'T'); |
---|
| 615 | } else { |
---|
| 616 | MetaphAdd(primary, '0'); |
---|
| 617 | } |
---|
| 618 | current += 2; |
---|
| 619 | break; |
---|
| 620 | } |
---|
| 621 | if (stringAt(in, current + 1, 1, list87)) { |
---|
| 622 | current += 2; |
---|
| 623 | } else |
---|
| 624 | current += 1; |
---|
| 625 | MetaphAdd(primary, 'T'); |
---|
| 626 | break; |
---|
| 627 | case 'V': |
---|
| 628 | if (in.charAt(current + 1) == 'V') |
---|
| 629 | current += 2; |
---|
| 630 | else |
---|
| 631 | current += 1; |
---|
| 632 | MetaphAdd(primary, 'F'); |
---|
| 633 | break; |
---|
| 634 | case 'W': |
---|
| 635 | if (stringAt(in, current, 2, list88)) { |
---|
| 636 | MetaphAdd(primary, 'R'); |
---|
| 637 | current += 2; |
---|
| 638 | break; |
---|
| 639 | } |
---|
| 640 | if ((current == 0) && (isVowel(in, current + 1, length) || stringAt(in, current, 2, list89))) { |
---|
| 641 | MetaphAdd(primary, 'A'); |
---|
| 642 | } |
---|
| 643 | if (((current == last) && isVowel(in, current - 1, length)) || stringAt(in, current - 1, 5, list90) || stringAt(in, 0, 3, list91)) { |
---|
| 644 | MetaphAdd(primary, 'F'); |
---|
| 645 | current += 1; |
---|
| 646 | break; |
---|
| 647 | } |
---|
| 648 | if (stringAt(in, current, 4, list92)) { |
---|
| 649 | MetaphAdd(primary, "TS"); |
---|
| 650 | current += 4; |
---|
| 651 | break; |
---|
| 652 | } |
---|
| 653 | current += 1; |
---|
| 654 | break; |
---|
| 655 | case 'X': |
---|
| 656 | if (!((current == last) && (stringAt(in, current - 3, 3, list93) || stringAt(in, current - 2, 2, list94)))) |
---|
| 657 | MetaphAdd(primary, "KS"); |
---|
| 658 | if (stringAt(in, current + 1, 1, list95)) |
---|
| 659 | current += 2; |
---|
| 660 | else |
---|
| 661 | current += 1; |
---|
| 662 | break; |
---|
| 663 | case 'Z': |
---|
| 664 | if (in.charAt(current + 1) == 'H') { |
---|
| 665 | MetaphAdd(primary, 'J'); |
---|
| 666 | current += 2; |
---|
| 667 | break; |
---|
| 668 | } else { |
---|
| 669 | MetaphAdd(primary, 'S'); |
---|
| 670 | } |
---|
| 671 | if (in.charAt(current + 1) == 'Z') |
---|
| 672 | current += 2; |
---|
| 673 | else |
---|
| 674 | current += 1; |
---|
| 675 | break; |
---|
| 676 | default: |
---|
| 677 | current += 1; |
---|
| 678 | } |
---|
| 679 | } |
---|
| 680 | return primary.toString(); |
---|
| 681 | } |
---|
| 682 | |
---|
| 683 | /** |
---|
| 684 | * @see com.swabunga.spell.engine.Transformator#getReplaceList() |
---|
| 685 | */ |
---|
| 686 | public char[] getReplaceList() { |
---|
| 687 | return replaceList; |
---|
| 688 | } |
---|
| 689 | } |
---|
| 690 | |
---|
| 691 | |
---|
| 692 | |
---|