source: branches/2.2/jabberit_messenger/java_source/src/com/swabunga/spell/event/SpellChecker.java @ 3102

Revision 3102, 11.9 KB checked in by amuller, 14 years ago (diff)

Ticket #986 - Efetuado merge para o Branch 2.2( atualizacao do modulo)

  • Property svn:executable set to *
Line 
1package com.swabunga.spell.event;
2
3import com.swabunga.spell.engine.Configuration;
4import com.swabunga.spell.engine.SpellDictionary;
5import com.swabunga.spell.engine.SpellDictionaryHashMap;
6import com.swabunga.spell.engine.Word;
7import com.swabunga.util.VectorUtility;
8
9import java.io.IOException;
10import java.util.Enumeration;
11import java.util.Hashtable;
12import java.util.List;
13import java.util.Vector;
14
15
16/**
17 * This is the main class for spell checking (using the new event based spell
18 *  checking).
19 *
20 * @author     Jason Height (jheight@chariot.net.au)
21 * 19 June 2002
22 */
23public class SpellChecker {
24  /** Flag indicating that the Spell Check completed without any errors present*/
25  public static final int SPELLCHECK_OK = -1;
26  /** Flag indicating that the Spell Check completed due to user cancellation*/
27  public static final int SPELLCHECK_CANCEL = -2;
28
29  private Vector eventListeners = new Vector();
30  private Vector dictionaries = new Vector();
31//  private SpellDictionary userdictionary;
32
33  private Configuration config = Configuration.getConfiguration();
34
35  /**This variable holds all of the words that are to be always ignored */
36//  private Vector ignoredWords = new Vector();
37//  private Hashtable autoReplaceWords = new Hashtable();
38
39  /**
40   * Constructs the SpellChecker.
41   */
42  public SpellChecker() {
43   // try {
44 //     userdictionary = new SpellDictionaryHashMap();
45   // } catch (IOException e) {
46  //    throw new RuntimeException("this exception should never happen because we are using null phonetic file");
47  //  }
48  }
49
50  /**
51   * Constructs the SpellChecker. The default threshold is used
52   *
53   * @param  dictionary  Description of the Parameter
54   */
55  public SpellChecker(SpellDictionary dictionary) {
56    this();
57    addDictionary(dictionary);
58  }
59
60
61  /**
62   * Constructs the SpellChecker with a threshold
63   *
64   * @param  dictionary  Description of the Parameter
65   * @param  threshold   Description of the Parameter
66   */
67  public SpellChecker(SpellDictionary dictionary, int threshold) {
68    this(dictionary);
69    config.setInteger(Configuration.SPELL_THRESHOLD, threshold);
70  }
71
72  public void addDictionary(SpellDictionary dictionary) {
73    if (dictionary == null) {
74      throw new IllegalArgumentException("dictionary must be non-null");
75    }
76    this.dictionaries.addElement(dictionary);
77  }
78 
79  /**
80   *Adds a SpellCheckListener
81   *
82   * @param  listener  The feature to be added to the SpellCheckListener attribute
83   */
84  public void addSpellCheckListener(SpellCheckListener listener) {
85    eventListeners.addElement(listener);
86  }
87
88
89  /**
90   *Removes a SpellCheckListener
91   *
92   * @param  listener  Description of the Parameter
93   */
94  public void removeSpellCheckListener(SpellCheckListener listener) {
95    eventListeners.removeElement(listener);
96  }
97
98  /*
99   * Set user dictionary (used when a word is added)
100   *
101  public void setUserDictionary(SpellDictionary dictionary) {
102    userdictionary = dictionary;
103  }
104
105  /*
106   *
107   * @return Current Configuration
108   *
109  public Configuration getConfiguration() {
110    return config;
111  }
112
113  /**
114   * Fires off a spell check event to the listeners.
115   *
116   * @param  event  Description of the Parameter
117   */
118  protected void fireSpellCheckEvent(SpellCheckEvent event) {
119    for (int i = eventListeners.size() - 1; i >= 0; i--) {
120      ((SpellCheckListener) eventListeners.elementAt(i)).spellingError(event);
121    }
122  }
123
124
125  /*
126   * This method clears the words that are currently being remembered as
127   *  Ignore All words and Replace All words.
128   *
129  public void reset() {
130    ignoredWords = new Vector();
131    autoReplaceWords = new Hashtable();
132  }
133*/
134
135  /**
136   * Checks the text string.
137   *  <p>
138   *  Returns the corrected string.
139   *
140   * @param  text   Description of the Parameter
141   * @return        Description of the Return Value
142   * @deprecated    use checkSpelling(WordTokenizer)
143   */
144  /*
145  public String checkString(String text) {
146    StringWordTokenizer tokens = new StringWordTokenizer(text);
147    checkSpelling(tokens);
148    return tokens.getContext();
149  }
150*/
151
152  /*
153   * Returns true iff this word contains a digit.
154   *
155   * @param  word  Description of the Parameter
156   * @return       The digitWord value
157   */
158  private final static boolean isDigitWord(String word) {
159    for (int i = word.length() - 1; i >= 0; i--) {
160      if (Character.isDigit(word.charAt(i))) {
161        return true;
162      }
163    }
164    return false;
165  }
166
167
168  /**
169   * Returns true iff this word looks like an internet address.
170   *
171   * One limitation is that this method cannot currently recognise email
172   * addresses. Since the 'word' that is passed in may in fact contain
173   * the rest of the document to be checked, it is not (yet!) a good
174   * idea to scan for the @ character.
175   *
176   * @param  word  Description of the Parameter
177   * @return       The iNETWord value
178   */
179    public final static boolean isINETWord(String word) {
180        String lowerCaseWord = word.toLowerCase();
181        return lowerCaseWord.startsWith("http://") ||
182              lowerCaseWord.startsWith("www.") ||
183              lowerCaseWord.startsWith("ftp://") ||
184              lowerCaseWord.startsWith("https://") ||
185              lowerCaseWord.startsWith("ftps://");
186  }
187
188
189  /**
190   * Returns true iif this word contains all upper case characters
191   *
192   * @param  word  Description of the Parameter
193   * @return       The upperCaseWord value
194   */
195  private final static boolean isUpperCaseWord(String word) {
196    for (int i = word.length() - 1; i >= 0; i--) {
197      if (Character.isLowerCase(word.charAt(i))) {
198        return false;
199      }
200    }
201    return true;
202  }
203
204
205  /**
206   * Returns true iif this word contains mixed case characters
207   *
208   * @param  word  Description of the Parameter
209   * @param startsSentence True if this word is at the start of a sentence
210   * @return       The mixedCaseWord value
211   */
212  private final static boolean isMixedCaseWord(String word, boolean startsSentence) {
213    int strLen = word.length();
214    boolean isUpper = Character.isUpperCase(word.charAt(0));
215    //Ignore the first character if this word starts the sentence and the first
216    //character was upper cased, since this is normal behaviour
217    if ((startsSentence) && isUpper && (strLen > 1))
218      isUpper = Character.isUpperCase(word.charAt(1));
219    if (isUpper) {
220      for (int i = word.length() - 1; i > 0; i--) {
221        if (Character.isLowerCase(word.charAt(i))) {
222          return true;
223        }
224      }
225    } else {
226      for (int i = word.length() - 1; i > 0; i--) {
227        if (Character.isUpperCase(word.charAt(i))) {
228          return true;
229        }
230      }
231    }
232    return false;
233  }
234
235
236  /*
237   * This method will fire the spell check event and then handle the event
238   *  action that has been selected by the user.
239   *
240   * @param  tokenizer        Description of the Parameter
241   * @param  event            Description of the Parameter
242   * @return                  Returns true if the event action is to cancel the current spell checking, false if the spell checking should continue
243   */
244  protected boolean fireAndHandleEvent(WordTokenizer tokenizer, SpellCheckEvent event) {
245    fireSpellCheckEvent(event);
246    /*
247    String word = event.getInvalidWord();
248    //Work out what to do in response to the event.
249    switch (event.getAction()) {
250      case SpellCheckEvent.INITIAL:
251        break;
252      case SpellCheckEvent.IGNORE:
253        break;
254      case SpellCheckEvent.IGNOREALL:
255        if (!ignoredWords.contains(word)) {
256          ignoredWords.addElement(word);
257        }
258        break;
259      case SpellCheckEvent.REPLACE:
260        tokenizer.replaceWord(event.getReplaceWord());
261        break;
262      case SpellCheckEvent.REPLACEALL:
263        String replaceAllWord = event.getReplaceWord();
264        if (!autoReplaceWords.containsKey(word)) {
265          autoReplaceWords.put(word, replaceAllWord);
266        }
267        tokenizer.replaceWord(replaceAllWord);
268        break;
269      case SpellCheckEvent.ADDTODICT:
270        String addWord = event.getReplaceWord();
271        if (!addWord.equals(word))
272          tokenizer.replaceWord(addWord);
273        userdictionary.addWord(addWord);
274        break;
275      case SpellCheckEvent.CANCEL:
276        return true;
277      default:
278        throw new IllegalArgumentException("Unhandled case.");
279    }
280    */
281    return false;
282  }
283 
284
285 
286  private boolean isCorrect(String word) {
287   // if (userdictionary.isCorrect(word)) return true;
288    for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) {
289      SpellDictionary dictionary = (SpellDictionary) e.nextElement();
290      if (dictionary.isCorrect(word)) return true;
291    }
292    return false;
293  }
294 
295
296  public List getSuggestions(String word, int threshold) {
297    List suggestions = new Vector();//userdictionary.getSuggestions(word, threshold);
298    for (Enumeration e = dictionaries.elements(); e.hasMoreElements();) {
299      SpellDictionary dictionary = (SpellDictionary) e.nextElement();
300      VectorUtility.addAll(suggestions, dictionary.getSuggestions(word, threshold), false);
301    }
302    return suggestions;
303  }
304
305 
306  /**
307   * This method is called to check the spelling of the words that are returned
308   * by the WordTokenizer.
309   * <p>For each invalid word the action listeners will be informed with a new SpellCheckEvent</p>
310   *
311   * @param  tokenizer  Description of the Parameter
312   * @return Either SPELLCHECK_OK, SPELLCHECK_CANCEL or the number of errors found. The number of errors are those that
313   * are found BEFORE any corrections are made.
314   */
315  public final int checkSpelling(WordTokenizer tokenizer) {
316    int errors = 0;
317    boolean terminated = false;
318    //Keep track of the previous word
319//    String previousWord = null;
320    while (tokenizer.hasMoreWords() && !terminated) {
321      String word = tokenizer.nextWord();
322      //Check the spelling of the word
323      if (!isCorrect(word)) {
324                if ((config.getBoolean(Configuration.SPELL_IGNOREMIXEDCASE) && isMixedCaseWord(word, tokenizer.isNewSentence())) ||
325            (config.getBoolean(Configuration.SPELL_IGNOREUPPERCASE) && isUpperCaseWord(word)) ||
326            (config.getBoolean(Configuration.SPELL_IGNOREDIGITWORDS) && isDigitWord(word)) ||
327            (config.getBoolean(Configuration.SPELL_IGNOREINTERNETADDRESSES) && isINETWord(word))) {
328          //Null event. Since we are ignoring this word due
329          //to one of the above cases.
330        } else {
331          //We cant ignore this misspelt word
332          //For this invalid word are we ignoring the misspelling?
333      //    if (!ignoredWords.contains(word)) {
334            errors++;
335            //Is this word being automagically replaced
336      //      if (autoReplaceWords.containsKey(word)) {
337       //       tokenizer.replaceWord((String) autoReplaceWords.get(word));
338       //     } else {
339              //JMH Need to somehow capitalise the suggestions if
340              //ignoreSentenceCapitalisation is not set to true
341              //Fire the event.
342              SpellCheckEvent event = new BasicSpellCheckEvent(word, null, tokenizer.getCurrentWordPosition());
343              terminated = fireAndHandleEvent(tokenizer, event);
344            }
345        //  }
346       // }
347      } else {
348        //This is a correctly spelt word. However perform some extra checks
349        /*
350         *  JMH TBD          //Check for multiple words
351         *  if (!ignoreMultipleWords &&) {
352         *  }
353         */
354        //Check for capitalisation (not interesting Eric)
355//        if ((!config.getBoolean(Configuration.SPELL_IGNORESENTENCECAPITALIZATION)) && (tokenizer.isNewSentence()) && (Character.isLowerCase(word.charAt(0)))) {
356//          errors++;
357//          StringBuffer buf = new StringBuffer(word);
358//          buf.setCharAt(0, Character.toUpperCase(word.charAt(0)));
359//          Vector suggestion = new Vector();
360//          suggestion.addElement(new Word(buf.toString(), 0));
361//          SpellCheckEvent event = new BasicSpellCheckEvent(word, suggestion, tokenizer);
362//          terminated = fireAndHandleEvent(tokenizer, event);
363//        }
364      }
365    }
366    if (terminated)
367      return SPELLCHECK_CANCEL;
368    else if (errors == 0)
369      return SPELLCHECK_OK;
370    else
371      return errors;
372  }
373 
374}
375
376
Note: See TracBrowser for help on using the repository browser.