[1001] | 1 | package com.swabunga.spell.event; |
---|
| 2 | |
---|
| 3 | import com.swabunga.spell.engine.Configuration; |
---|
| 4 | import com.swabunga.spell.engine.SpellDictionary; |
---|
| 5 | import com.swabunga.spell.engine.SpellDictionaryHashMap; |
---|
| 6 | import com.swabunga.spell.engine.Word; |
---|
| 7 | import com.swabunga.util.VectorUtility; |
---|
| 8 | |
---|
| 9 | import java.io.IOException; |
---|
| 10 | import java.util.Enumeration; |
---|
| 11 | import java.util.Hashtable; |
---|
| 12 | import java.util.List; |
---|
| 13 | import java.util.Vector; |
---|
| 14 | |
---|
| 15 | |
---|
| 16 | /** |
---|
| 17 | * This is the main class for spell checking (using the new event based spell |
---|
| 18 | * checking). |
---|
| 19 | * |
---|
| 20 | * @author Jason Height (jheight@chariot.net.au) |
---|
| 21 | * 19 June 2002 |
---|
| 22 | */ |
---|
| 23 | public class SpellChecker { |
---|
| 24 | /** Flag indicating that the Spell Check completed without any errors present*/ |
---|
| 25 | public static final int SPELLCHECK_OK = -1; |
---|
| 26 | /** Flag indicating that the Spell Check completed due to user cancellation*/ |
---|
| 27 | public static final int SPELLCHECK_CANCEL = -2; |
---|
| 28 | |
---|
| 29 | private Vector eventListeners = new Vector(); |
---|
| 30 | private Vector dictionaries = new Vector(); |
---|
| 31 | // private SpellDictionary userdictionary; |
---|
| 32 | |
---|
| 33 | private Configuration config = Configuration.getConfiguration(); |
---|
| 34 | |
---|
| 35 | /**This variable holds all of the words that are to be always ignored */ |
---|
| 36 | // private Vector ignoredWords = new Vector(); |
---|
| 37 | // private Hashtable autoReplaceWords = new Hashtable(); |
---|
| 38 | |
---|
| 39 | /** |
---|
| 40 | * Constructs the SpellChecker. |
---|
| 41 | */ |
---|
| 42 | public SpellChecker() { |
---|
| 43 | // try { |
---|
| 44 | // userdictionary = new SpellDictionaryHashMap(); |
---|
| 45 | // } catch (IOException e) { |
---|
| 46 | // throw new RuntimeException("this exception should never happen because we are using null phonetic file"); |
---|
| 47 | // } |
---|
| 48 | } |
---|
| 49 | |
---|
| 50 | /** |
---|
| 51 | * Constructs the SpellChecker. The default threshold is used |
---|
| 52 | * |
---|
| 53 | * @param dictionary Description of the Parameter |
---|
| 54 | */ |
---|
| 55 | public SpellChecker(SpellDictionary dictionary) { |
---|
| 56 | this(); |
---|
| 57 | addDictionary(dictionary); |
---|
| 58 | } |
---|
| 59 | |
---|
| 60 | |
---|
| 61 | /** |
---|
| 62 | * Constructs the SpellChecker with a threshold |
---|
| 63 | * |
---|
| 64 | * @param dictionary Description of the Parameter |
---|
| 65 | * @param threshold Description of the Parameter |
---|
| 66 | */ |
---|
| 67 | public SpellChecker(SpellDictionary dictionary, int threshold) { |
---|
| 68 | this(dictionary); |
---|
| 69 | config.setInteger(Configuration.SPELL_THRESHOLD, threshold); |
---|
| 70 | } |
---|
| 71 | |
---|
| 72 | public void addDictionary(SpellDictionary dictionary) { |
---|
| 73 | if (dictionary == null) { |
---|
| 74 | throw new IllegalArgumentException("dictionary must be non-null"); |
---|
| 75 | } |
---|
| 76 | this.dictionaries.addElement(dictionary); |
---|
| 77 | } |
---|
| 78 | |
---|
| 79 | /** |
---|
| 80 | *Adds a SpellCheckListener |
---|
| 81 | * |
---|
| 82 | * @param listener The feature to be added to the SpellCheckListener attribute |
---|
| 83 | */ |
---|
| 84 | public void addSpellCheckListener(SpellCheckListener listener) { |
---|
| 85 | eventListeners.addElement(listener); |
---|
| 86 | } |
---|
| 87 | |
---|
| 88 | |
---|
| 89 | /** |
---|
| 90 | *Removes a SpellCheckListener |
---|
| 91 | * |
---|
| 92 | * @param listener Description of the Parameter |
---|
| 93 | */ |
---|
| 94 | public void removeSpellCheckListener(SpellCheckListener listener) { |
---|
| 95 | eventListeners.removeElement(listener); |
---|
| 96 | } |
---|
| 97 | |
---|
| 98 | /* |
---|
| 99 | * Set user dictionary (used when a word is added) |
---|
| 100 | * |
---|
| 101 | public void setUserDictionary(SpellDictionary dictionary) { |
---|
| 102 | userdictionary = dictionary; |
---|
| 103 | } |
---|
| 104 | |
---|
| 105 | /* |
---|
| 106 | * |
---|
| 107 | * @return Current Configuration |
---|
| 108 | * |
---|
| 109 | public Configuration getConfiguration() { |
---|
| 110 | return config; |
---|
| 111 | } |
---|
| 112 | |
---|
| 113 | /** |
---|
| 114 | * Fires off a spell check event to the listeners. |
---|
| 115 | * |
---|
| 116 | * @param event Description of the Parameter |
---|
| 117 | */ |
---|
| 118 | protected void fireSpellCheckEvent(SpellCheckEvent event) { |
---|
| 119 | for (int i = eventListeners.size() - 1; i >= 0; i--) { |
---|
| 120 | ((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event); |
---|
| 121 | } |
---|
| 122 | } |
---|
| 123 | |
---|
| 124 | |
---|
| 125 | /* |
---|
| 126 | * This method clears the words that are currently being remembered as |
---|
| 127 | * Ignore All words and Replace All words. |
---|
| 128 | * |
---|
| 129 | public void reset() { |
---|
| 130 | ignoredWords = new Vector(); |
---|
| 131 | autoReplaceWords = new Hashtable(); |
---|
| 132 | } |
---|
| 133 | */ |
---|
| 134 | |
---|
| 135 | /** |
---|
| 136 | * Checks the text string. |
---|
| 137 | * <p> |
---|
| 138 | * Returns the corrected string. |
---|
| 139 | * |
---|
| 140 | * @param text Description of the Parameter |
---|
| 141 | * @return Description of the Return Value |
---|
| 142 | * @deprecated use checkSpelling(WordTokenizer) |
---|
| 143 | */ |
---|
| 144 | /* |
---|
| 145 | public String checkString(String text) { |
---|
| 146 | StringWordTokenizer tokens = new StringWordTokenizer(text); |
---|
| 147 | checkSpelling(tokens); |
---|
| 148 | return tokens.getContext(); |
---|
| 149 | } |
---|
| 150 | */ |
---|
| 151 | |
---|
| 152 | /* |
---|
| 153 | * Returns true iff this word contains a digit. |
---|
| 154 | * |
---|
| 155 | * @param word Description of the Parameter |
---|
| 156 | * @return The digitWord value |
---|
| 157 | */ |
---|
| 158 | private final static boolean isDigitWord(String word) { |
---|
| 159 | for (int i = word.length() - 1; i >= 0; i--) { |
---|
| 160 | if (Character.isDigit(word.charAt(i))) { |
---|
| 161 | return true; |
---|
| 162 | } |
---|
| 163 | } |
---|
| 164 | return false; |
---|
| 165 | } |
---|
| 166 | |
---|
| 167 | |
---|
| 168 | /** |
---|
| 169 | * Returns true iff this word looks like an internet address. |
---|
| 170 | * |
---|
| 171 | * One limitation is that this method cannot currently recognise email |
---|
| 172 | * addresses. Since the 'word' that is passed in may in fact contain |
---|
| 173 | * the rest of the document to be checked, it is not (yet!) a good |
---|
| 174 | * idea to scan for the @ character. |
---|
| 175 | * |
---|
| 176 | * @param word Description of the Parameter |
---|
| 177 | * @return The iNETWord value |
---|
| 178 | */ |
---|
| 179 | public final static boolean isINETWord(String word) { |
---|
| 180 | String lowerCaseWord = word.toLowerCase(); |
---|
| 181 | return lowerCaseWord.startsWith("http://") || |
---|
| 182 | lowerCaseWord.startsWith("www.") || |
---|
| 183 | lowerCaseWord.startsWith("ftp://") || |
---|
| 184 | lowerCaseWord.startsWith("https://") || |
---|
| 185 | lowerCaseWord.startsWith("ftps://"); |
---|
| 186 | } |
---|
| 187 | |
---|
| 188 | |
---|
| 189 | /** |
---|
| 190 | * Returns true iif this word contains all upper case characters |
---|
| 191 | * |
---|
| 192 | * @param word Description of the Parameter |
---|
| 193 | * @return The upperCaseWord value |
---|
| 194 | */ |
---|
| 195 | private final static boolean isUpperCaseWord(String word) { |
---|
| 196 | for (int i = word.length() - 1; i >= 0; i--) { |
---|
| 197 | if (Character.isLowerCase(word.charAt(i))) { |
---|
| 198 | return false; |
---|
| 199 | } |
---|
| 200 | } |
---|
| 201 | return true; |
---|
| 202 | } |
---|
| 203 | |
---|
| 204 | |
---|
| 205 | /** |
---|
| 206 | * Returns true iif this word contains mixed case characters |
---|
| 207 | * |
---|
| 208 | * @param word Description of the Parameter |
---|
| 209 | * @param startsSentence True if this word is at the start of a sentence |
---|
| 210 | * @return The mixedCaseWord value |
---|
| 211 | */ |
---|
| 212 | private final static boolean isMixedCaseWord(String word, boolean startsSentence) { |
---|
| 213 | int strLen = word.length(); |
---|
| 214 | boolean isUpper = Character.isUpperCase(word.charAt(0)); |
---|
| 215 | //Ignore the first character if this word starts the sentence and the first |
---|
| 216 | //character was upper cased, since this is normal behaviour |
---|
| 217 | if ((startsSentence) && isUpper && (strLen > 1)) |
---|
| 218 | isUpper = Character.isUpperCase(word.charAt(1)); |
---|
| 219 | if (isUpper) { |
---|
| 220 | for (int i = word.length() - 1; i > 0; i--) { |
---|
| 221 | if (Character.isLowerCase(word.charAt(i))) { |
---|
| 222 | return true; |
---|
| 223 | } |
---|
| 224 | } |
---|
| 225 | } else { |
---|
| 226 | for (int i = word.length() - 1; i > 0; i--) { |
---|
| 227 | if (Character.isUpperCase(word.charAt(i))) { |
---|
| 228 | return true; |
---|
| 229 | } |
---|
| 230 | } |
---|
| 231 | } |
---|
| 232 | return false; |
---|
| 233 | } |
---|
| 234 | |
---|
| 235 | |
---|
| 236 | /* |
---|
| 237 | * This method will fire the spell check event and then handle the event |
---|
| 238 | * action that has been selected by the user. |
---|
| 239 | * |
---|
| 240 | * @param tokenizer Description of the Parameter |
---|
| 241 | * @param event Description of the Parameter |
---|
| 242 | * @return Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue |
---|
| 243 | */ |
---|
| 244 | protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) { |
---|
| 245 | fireSpellCheckEvent(event); |
---|
| 246 | /* |
---|
| 247 | String word = event.getInvalidWord(); |
---|
| 248 | //Work out what to do in response to the event. |
---|
| 249 | switch (event.getAction()) { |
---|
| 250 | case SpellCheckEvent.INITIAL: |
---|
| 251 | break; |
---|
| 252 | case SpellCheckEvent.IGNORE: |
---|
| 253 | break; |
---|
| 254 | case SpellCheckEvent.IGNOREALL: |
---|
| 255 | if (!ignoredWords.contains(word)) { |
---|
| 256 | ignoredWords.addElement(word); |
---|
| 257 | } |
---|
| 258 | break; |
---|
| 259 | case SpellCheckEvent.REPLACE: |
---|
| 260 | tokenizer.replaceWord(event.getReplaceWord()); |
---|
| 261 | break; |
---|
| 262 | case SpellCheckEvent.REPLACEALL: |
---|
| 263 | String replaceAllWord = event.getReplaceWord(); |
---|
| 264 | if (!autoReplaceWords.containsKey(word)) { |
---|
| 265 | autoReplaceWords.put(word, replaceAllWord); |
---|
| 266 | } |
---|
| 267 | tokenizer.replaceWord(replaceAllWord); |
---|
| 268 | break; |
---|
| 269 | case SpellCheckEvent.ADDTODICT: |
---|
| 270 | String addWord = event.getReplaceWord(); |
---|
| 271 | if (!addWord.equals(word)) |
---|
| 272 | tokenizer.replaceWord(addWord); |
---|
| 273 | userdictionary.addWord(addWord); |
---|
| 274 | break; |
---|
| 275 | case SpellCheckEvent.CANCEL: |
---|
| 276 | return true; |
---|
| 277 | default: |
---|
| 278 | throw new IllegalArgumentException("Unhandled case."); |
---|
| 279 | } |
---|
| 280 | */ |
---|
| 281 | return false; |
---|
| 282 | } |
---|
| 283 | |
---|
| 284 | |
---|
| 285 | |
---|
| 286 | private boolean isCorrect(String word) { |
---|
| 287 | // if (userdictionary.isCorrect(word)) return true; |
---|
| 288 | for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) { |
---|
| 289 | SpellDictionary dictionary = (SpellDictionary) e.nextElement(); |
---|
| 290 | if (dictionary.isCorrect(word)) return true; |
---|
| 291 | } |
---|
| 292 | return false; |
---|
| 293 | } |
---|
| 294 | |
---|
| 295 | |
---|
| 296 | public List getSuggestions(String word, int threshold) { |
---|
| 297 | List suggestions = new Vector();//userdictionary.getSuggestions(word, threshold); |
---|
| 298 | for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) { |
---|
| 299 | SpellDictionary dictionary = (SpellDictionary) e.nextElement(); |
---|
| 300 | VectorUtility.addAll(suggestions, dictionary.getSuggestions(word, threshold), false); |
---|
| 301 | } |
---|
| 302 | return suggestions; |
---|
| 303 | } |
---|
| 304 | |
---|
| 305 | |
---|
| 306 | /** |
---|
| 307 | * This method is called to check the spelling of the words that are returned |
---|
| 308 | * by the WordTokenizer. |
---|
| 309 | * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent</p> |
---|
| 310 | * |
---|
| 311 | * @param tokenizer Description of the Parameter |
---|
| 312 | * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of errors are those that |
---|
| 313 | * are found BEFORE any corrections are made. |
---|
| 314 | */ |
---|
| 315 | public final int checkSpelling(WordTokenizer tokenizer) { |
---|
| 316 | int errors = 0; |
---|
| 317 | boolean terminated = false; |
---|
| 318 | //Keep track of the previous word |
---|
| 319 | // String previousWord = null; |
---|
| 320 | while (tokenizer.hasMoreWords() && !terminated) { |
---|
| 321 | String word = tokenizer.nextWord(); |
---|
| 322 | //Check the spelling of the word |
---|
| 323 | if (!isCorrect(word)) { |
---|
| 324 | if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentence())) || |
---|
| 325 | (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) || |
---|
| 326 | (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) || |
---|
| 327 | (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) { |
---|
| 328 | //Null event. Since we are ignoring this word due |
---|
| 329 | //to one of the above cases. |
---|
| 330 | } else { |
---|
| 331 | //We cant ignore this misspelt word |
---|
| 332 | //For this invalid word are we ignoring the misspelling? |
---|
| 333 | // if (!ignoredWords.contains(word)) { |
---|
| 334 | errors++; |
---|
| 335 | //Is this word being automagically replaced |
---|
| 336 | // if (autoReplaceWords.containsKey(word)) { |
---|
| 337 | // tokenizer.replaceWord((String) autoReplaceWords.get(word)); |
---|
| 338 | // } else { |
---|
| 339 | //JMH Need to somehow capitalise the suggestions if |
---|
| 340 | //ignoreSentenceCapitalisation is not set to true |
---|
| 341 | //Fire the event. |
---|
| 342 | SpellCheckEvent event = new BasicSpellCheckEvent(word, null, tokenizer.getCurrentWordPosition()); |
---|
| 343 | terminated = fireAndHandleEvent(tokenizer, event); |
---|
| 344 | } |
---|
| 345 | // } |
---|
| 346 | // } |
---|
| 347 | } else { |
---|
| 348 | //This is a correctly spelt word. However perform some extra checks |
---|
| 349 | /* |
---|
| 350 | * JMH TBD //Check for multiple words |
---|
| 351 | * if (!ignoreMultipleWords &&) { |
---|
| 352 | * } |
---|
| 353 | */ |
---|
| 354 | //Check for capitalisation (not interesting Eric) |
---|
| 355 | // if ((!config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION)) && (tokenizer.isNewSentence()) && (Character.isLowerCase(word.charAt(0)))) { |
---|
| 356 | // errors++; |
---|
| 357 | // StringBuffer buf = new StringBuffer(word); |
---|
| 358 | // buf.setCharAt(0, Character.toUpperCase(word.charAt(0))); |
---|
| 359 | // Vector suggestion = new Vector(); |
---|
| 360 | // suggestion.addElement(new Word(buf.toString(), 0)); |
---|
| 361 | // SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer); |
---|
| 362 | // terminated = fireAndHandleEvent(tokenizer, event); |
---|
| 363 | // } |
---|
| 364 | } |
---|
| 365 | } |
---|
| 366 | if (terminated) |
---|
| 367 | return SPELLCHECK_CANCEL; |
---|
| 368 | else if (errors == 0) |
---|
| 369 | return SPELLCHECK_OK; |
---|
| 370 | else |
---|
| 371 | return errors; |
---|
| 372 | } |
---|
| 373 | |
---|
| 374 | } |
---|
| 375 | |
---|
| 376 | |
---|