1 | package com.swabunga.spell.event; |
---|
2 | |
---|
3 | import com.swabunga.spell.engine.Configuration; |
---|
4 | import com.swabunga.spell.engine.SpellDictionary; |
---|
5 | import com.swabunga.spell.engine.SpellDictionaryHashMap; |
---|
6 | import com.swabunga.spell.engine.Word; |
---|
7 | import com.swabunga.util.VectorUtility; |
---|
8 | |
---|
9 | import java.io.IOException; |
---|
10 | import java.util.Enumeration; |
---|
11 | import java.util.Hashtable; |
---|
12 | import java.util.List; |
---|
13 | import java.util.Vector; |
---|
14 | |
---|
15 | |
---|
16 | /** |
---|
17 | * This is the main class for spell checking (using the new event based spell |
---|
18 | * checking). |
---|
19 | * |
---|
20 | * @author Jason Height (jheight@chariot.net.au) |
---|
21 | * 19 June 2002 |
---|
22 | */ |
---|
23 | public class SpellChecker { |
---|
24 | /** Flag indicating that the Spell Check completed without any errors present*/ |
---|
25 | public static final int SPELLCHECK_OK = -1; |
---|
26 | /** Flag indicating that the Spell Check completed due to user cancellation*/ |
---|
27 | public static final int SPELLCHECK_CANCEL = -2; |
---|
28 | |
---|
29 | private Vector eventListeners = new Vector(); |
---|
30 | private Vector dictionaries = new Vector(); |
---|
31 | // private SpellDictionary userdictionary; |
---|
32 | |
---|
33 | private Configuration config = Configuration.getConfiguration(); |
---|
34 | |
---|
35 | /**This variable holds all of the words that are to be always ignored */ |
---|
36 | // private Vector ignoredWords = new Vector(); |
---|
37 | // private Hashtable autoReplaceWords = new Hashtable(); |
---|
38 | |
---|
39 | /** |
---|
40 | * Constructs the SpellChecker. |
---|
41 | */ |
---|
42 | public SpellChecker() { |
---|
43 | // try { |
---|
44 | // userdictionary = new SpellDictionaryHashMap(); |
---|
45 | // } catch (IOException e) { |
---|
46 | // throw new RuntimeException("this exception should never happen because we are using null phonetic file"); |
---|
47 | // } |
---|
48 | } |
---|
49 | |
---|
50 | /** |
---|
51 | * Constructs the SpellChecker. The default threshold is used |
---|
52 | * |
---|
53 | * @param dictionary Description of the Parameter |
---|
54 | */ |
---|
55 | public SpellChecker(SpellDictionary dictionary) { |
---|
56 | this(); |
---|
57 | addDictionary(dictionary); |
---|
58 | } |
---|
59 | |
---|
60 | |
---|
61 | /** |
---|
62 | * Constructs the SpellChecker with a threshold |
---|
63 | * |
---|
64 | * @param dictionary Description of the Parameter |
---|
65 | * @param threshold Description of the Parameter |
---|
66 | */ |
---|
67 | public SpellChecker(SpellDictionary dictionary, int threshold) { |
---|
68 | this(dictionary); |
---|
69 | config.setInteger(Configuration.SPELL_THRESHOLD, threshold); |
---|
70 | } |
---|
71 | |
---|
72 | public void addDictionary(SpellDictionary dictionary) { |
---|
73 | if (dictionary == null) { |
---|
74 | throw new IllegalArgumentException("dictionary must be non-null"); |
---|
75 | } |
---|
76 | this.dictionaries.addElement(dictionary); |
---|
77 | } |
---|
78 | |
---|
79 | /** |
---|
80 | *Adds a SpellCheckListener |
---|
81 | * |
---|
82 | * @param listener The feature to be added to the SpellCheckListener attribute |
---|
83 | */ |
---|
84 | public void addSpellCheckListener(SpellCheckListener listener) { |
---|
85 | eventListeners.addElement(listener); |
---|
86 | } |
---|
87 | |
---|
88 | |
---|
89 | /** |
---|
90 | *Removes a SpellCheckListener |
---|
91 | * |
---|
92 | * @param listener Description of the Parameter |
---|
93 | */ |
---|
94 | public void removeSpellCheckListener(SpellCheckListener listener) { |
---|
95 | eventListeners.removeElement(listener); |
---|
96 | } |
---|
97 | |
---|
98 | /* |
---|
99 | * Set user dictionary (used when a word is added) |
---|
100 | * |
---|
101 | public void setUserDictionary(SpellDictionary dictionary) { |
---|
102 | userdictionary = dictionary; |
---|
103 | } |
---|
104 | |
---|
105 | /* |
---|
106 | * |
---|
107 | * @return Current Configuration |
---|
108 | * |
---|
109 | public Configuration getConfiguration() { |
---|
110 | return config; |
---|
111 | } |
---|
112 | |
---|
113 | /** |
---|
114 | * Fires off a spell check event to the listeners. |
---|
115 | * |
---|
116 | * @param event Description of the Parameter |
---|
117 | */ |
---|
118 | protected void fireSpellCheckEvent(SpellCheckEvent event) { |
---|
119 | for (int i = eventListeners.size() - 1; i >= 0; i--) { |
---|
120 | ((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event); |
---|
121 | } |
---|
122 | } |
---|
123 | |
---|
124 | |
---|
125 | /* |
---|
126 | * This method clears the words that are currently being remembered as |
---|
127 | * Ignore All words and Replace All words. |
---|
128 | * |
---|
129 | public void reset() { |
---|
130 | ignoredWords = new Vector(); |
---|
131 | autoReplaceWords = new Hashtable(); |
---|
132 | } |
---|
133 | */ |
---|
134 | |
---|
135 | /** |
---|
136 | * Checks the text string. |
---|
137 | * <p> |
---|
138 | * Returns the corrected string. |
---|
139 | * |
---|
140 | * @param text Description of the Parameter |
---|
141 | * @return Description of the Return Value |
---|
142 | * @deprecated use checkSpelling(WordTokenizer) |
---|
143 | */ |
---|
144 | /* |
---|
145 | public String checkString(String text) { |
---|
146 | StringWordTokenizer tokens = new StringWordTokenizer(text); |
---|
147 | checkSpelling(tokens); |
---|
148 | return tokens.getContext(); |
---|
149 | } |
---|
150 | */ |
---|
151 | |
---|
152 | /* |
---|
153 | * Returns true iff this word contains a digit. |
---|
154 | * |
---|
155 | * @param word Description of the Parameter |
---|
156 | * @return The digitWord value |
---|
157 | */ |
---|
158 | private final static boolean isDigitWord(String word) { |
---|
159 | for (int i = word.length() - 1; i >= 0; i--) { |
---|
160 | if (Character.isDigit(word.charAt(i))) { |
---|
161 | return true; |
---|
162 | } |
---|
163 | } |
---|
164 | return false; |
---|
165 | } |
---|
166 | |
---|
167 | |
---|
168 | /** |
---|
169 | * Returns true iff this word looks like an internet address. |
---|
170 | * |
---|
171 | * One limitation is that this method cannot currently recognise email |
---|
172 | * addresses. Since the 'word' that is passed in may in fact contain |
---|
173 | * the rest of the document to be checked, it is not (yet!) a good |
---|
174 | * idea to scan for the @ character. |
---|
175 | * |
---|
176 | * @param word Description of the Parameter |
---|
177 | * @return The iNETWord value |
---|
178 | */ |
---|
179 | public final static boolean isINETWord(String word) { |
---|
180 | String lowerCaseWord = word.toLowerCase(); |
---|
181 | return lowerCaseWord.startsWith("http://") || |
---|
182 | lowerCaseWord.startsWith("www.") || |
---|
183 | lowerCaseWord.startsWith("ftp://") || |
---|
184 | lowerCaseWord.startsWith("https://") || |
---|
185 | lowerCaseWord.startsWith("ftps://"); |
---|
186 | } |
---|
187 | |
---|
188 | |
---|
189 | /** |
---|
190 | * Returns true iif this word contains all upper case characters |
---|
191 | * |
---|
192 | * @param word Description of the Parameter |
---|
193 | * @return The upperCaseWord value |
---|
194 | */ |
---|
195 | private final static boolean isUpperCaseWord(String word) { |
---|
196 | for (int i = word.length() - 1; i >= 0; i--) { |
---|
197 | if (Character.isLowerCase(word.charAt(i))) { |
---|
198 | return false; |
---|
199 | } |
---|
200 | } |
---|
201 | return true; |
---|
202 | } |
---|
203 | |
---|
204 | |
---|
205 | /** |
---|
206 | * Returns true iif this word contains mixed case characters |
---|
207 | * |
---|
208 | * @param word Description of the Parameter |
---|
209 | * @param startsSentence True if this word is at the start of a sentence |
---|
210 | * @return The mixedCaseWord value |
---|
211 | */ |
---|
212 | private final static boolean isMixedCaseWord(String word, boolean startsSentence) { |
---|
213 | int strLen = word.length(); |
---|
214 | boolean isUpper = Character.isUpperCase(word.charAt(0)); |
---|
215 | //Ignore the first character if this word starts the sentence and the first |
---|
216 | //character was upper cased, since this is normal behaviour |
---|
217 | if ((startsSentence) && isUpper && (strLen > 1)) |
---|
218 | isUpper = Character.isUpperCase(word.charAt(1)); |
---|
219 | if (isUpper) { |
---|
220 | for (int i = word.length() - 1; i > 0; i--) { |
---|
221 | if (Character.isLowerCase(word.charAt(i))) { |
---|
222 | return true; |
---|
223 | } |
---|
224 | } |
---|
225 | } else { |
---|
226 | for (int i = word.length() - 1; i > 0; i--) { |
---|
227 | if (Character.isUpperCase(word.charAt(i))) { |
---|
228 | return true; |
---|
229 | } |
---|
230 | } |
---|
231 | } |
---|
232 | return false; |
---|
233 | } |
---|
234 | |
---|
235 | |
---|
236 | /* |
---|
237 | * This method will fire the spell check event and then handle the event |
---|
238 | * action that has been selected by the user. |
---|
239 | * |
---|
240 | * @param tokenizer Description of the Parameter |
---|
241 | * @param event Description of the Parameter |
---|
242 | * @return Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue |
---|
243 | */ |
---|
244 | protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) { |
---|
245 | fireSpellCheckEvent(event); |
---|
246 | /* |
---|
247 | String word = event.getInvalidWord(); |
---|
248 | //Work out what to do in response to the event. |
---|
249 | switch (event.getAction()) { |
---|
250 | case SpellCheckEvent.INITIAL: |
---|
251 | break; |
---|
252 | case SpellCheckEvent.IGNORE: |
---|
253 | break; |
---|
254 | case SpellCheckEvent.IGNOREALL: |
---|
255 | if (!ignoredWords.contains(word)) { |
---|
256 | ignoredWords.addElement(word); |
---|
257 | } |
---|
258 | break; |
---|
259 | case SpellCheckEvent.REPLACE: |
---|
260 | tokenizer.replaceWord(event.getReplaceWord()); |
---|
261 | break; |
---|
262 | case SpellCheckEvent.REPLACEALL: |
---|
263 | String replaceAllWord = event.getReplaceWord(); |
---|
264 | if (!autoReplaceWords.containsKey(word)) { |
---|
265 | autoReplaceWords.put(word, replaceAllWord); |
---|
266 | } |
---|
267 | tokenizer.replaceWord(replaceAllWord); |
---|
268 | break; |
---|
269 | case SpellCheckEvent.ADDTODICT: |
---|
270 | String addWord = event.getReplaceWord(); |
---|
271 | if (!addWord.equals(word)) |
---|
272 | tokenizer.replaceWord(addWord); |
---|
273 | userdictionary.addWord(addWord); |
---|
274 | break; |
---|
275 | case SpellCheckEvent.CANCEL: |
---|
276 | return true; |
---|
277 | default: |
---|
278 | throw new IllegalArgumentException("Unhandled case."); |
---|
279 | } |
---|
280 | */ |
---|
281 | return false; |
---|
282 | } |
---|
283 | |
---|
284 | |
---|
285 | |
---|
286 | private boolean isCorrect(String word) { |
---|
287 | // if (userdictionary.isCorrect(word)) return true; |
---|
288 | for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) { |
---|
289 | SpellDictionary dictionary = (SpellDictionary) e.nextElement(); |
---|
290 | if (dictionary.isCorrect(word)) return true; |
---|
291 | } |
---|
292 | return false; |
---|
293 | } |
---|
294 | |
---|
295 | |
---|
296 | public List getSuggestions(String word, int threshold) { |
---|
297 | List suggestions = new Vector();//userdictionary.getSuggestions(word, threshold); |
---|
298 | for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) { |
---|
299 | SpellDictionary dictionary = (SpellDictionary) e.nextElement(); |
---|
300 | VectorUtility.addAll(suggestions, dictionary.getSuggestions(word, threshold), false); |
---|
301 | } |
---|
302 | return suggestions; |
---|
303 | } |
---|
304 | |
---|
305 | |
---|
306 | /** |
---|
307 | * This method is called to check the spelling of the words that are returned |
---|
308 | * by the WordTokenizer. |
---|
309 | * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent</p> |
---|
310 | * |
---|
311 | * @param tokenizer Description of the Parameter |
---|
312 | * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of errors are those that |
---|
313 | * are found BEFORE any corrections are made. |
---|
314 | */ |
---|
315 | public final int checkSpelling(WordTokenizer tokenizer) { |
---|
316 | int errors = 0; |
---|
317 | boolean terminated = false; |
---|
318 | //Keep track of the previous word |
---|
319 | // String previousWord = null; |
---|
320 | while (tokenizer.hasMoreWords() && !terminated) { |
---|
321 | String word = tokenizer.nextWord(); |
---|
322 | //Check the spelling of the word |
---|
323 | if (!isCorrect(word)) { |
---|
324 | if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentence())) || |
---|
325 | (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) || |
---|
326 | (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) || |
---|
327 | (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) { |
---|
328 | //Null event. Since we are ignoring this word due |
---|
329 | //to one of the above cases. |
---|
330 | } else { |
---|
331 | //We cant ignore this misspelt word |
---|
332 | //For this invalid word are we ignoring the misspelling? |
---|
333 | // if (!ignoredWords.contains(word)) { |
---|
334 | errors++; |
---|
335 | //Is this word being automagically replaced |
---|
336 | // if (autoReplaceWords.containsKey(word)) { |
---|
337 | // tokenizer.replaceWord((String) autoReplaceWords.get(word)); |
---|
338 | // } else { |
---|
339 | //JMH Need to somehow capitalise the suggestions if |
---|
340 | //ignoreSentenceCapitalisation is not set to true |
---|
341 | //Fire the event. |
---|
342 | SpellCheckEvent event = new BasicSpellCheckEvent(word, null, tokenizer.getCurrentWordPosition()); |
---|
343 | terminated = fireAndHandleEvent(tokenizer, event); |
---|
344 | } |
---|
345 | // } |
---|
346 | // } |
---|
347 | } else { |
---|
348 | //This is a correctly spelt word. However perform some extra checks |
---|
349 | /* |
---|
350 | * JMH TBD //Check for multiple words |
---|
351 | * if (!ignoreMultipleWords &&) { |
---|
352 | * } |
---|
353 | */ |
---|
354 | //Check for capitalisation (not interesting Eric) |
---|
355 | // if ((!config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION)) && (tokenizer.isNewSentence()) && (Character.isLowerCase(word.charAt(0)))) { |
---|
356 | // errors++; |
---|
357 | // StringBuffer buf = new StringBuffer(word); |
---|
358 | // buf.setCharAt(0, Character.toUpperCase(word.charAt(0))); |
---|
359 | // Vector suggestion = new Vector(); |
---|
360 | // suggestion.addElement(new Word(buf.toString(), 0)); |
---|
361 | // SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer); |
---|
362 | // terminated = fireAndHandleEvent(tokenizer, event); |
---|
363 | // } |
---|
364 | } |
---|
365 | } |
---|
366 | if (terminated) |
---|
367 | return SPELLCHECK_CANCEL; |
---|
368 | else if (errors == 0) |
---|
369 | return SPELLCHECK_OK; |
---|
370 | else |
---|
371 | return errors; |
---|
372 | } |
---|
373 | |
---|
374 | } |
---|
375 | |
---|
376 | |
---|