source: 3thparty/jmessenger/src/com/swabunga/spell/engine/SpellDictionaryHashMap.java @ 3952

Revision 3952, 7.3 KB checked in by alexandrecorreia, 13 years ago (diff)

Ticket #1710 - Adicao do codigo fonte java do componente jmessenger(jabberit_messenger)

  • Property svn:executable set to *
Line 
1/*
2 * put your module comment here
3 * formatted with JxBeauty (c) johann.langhofer@nextra.at
4 */
5
6package com.swabunga.spell.engine;
7
8import java.io.*;
9import java.util.Hashtable;
10import java.util.List;
11import java.util.Vector;
12
13/**
14 * The SpellDictionaryHashMap holds the dictionary
15 * <p/>
16 * This class is thread safe. Derived classes should ensure that this preserved.
17 * <p/>
18 * There are many open source dictionary files. For just a few see:
19 * http://wordlist.sourceforge.net/
20 * <p/>
21 * This dictionary class reads words one per line. Make sure that your word list
22 * is formatted in this way (most are).
23 */
24public class SpellDictionaryHashMap extends SpellDictionaryASpell {
25  /** A field indicating the initial hash map capacity (16KB) for the main
26   *  dictionary hash map. Interested to see what the performance of a
27   *  smaller initial capacity is like.
28   */
29  private final static int INITIAL_CAPACITY = 16 * 1024;
30
31  /**
32   * The hashmap that contains the word dictionary. The map is hashed on the doublemeta
33   * code. The map entry contains a LinkedList of words that have the same double meta code.
34   */
35  protected Hashtable mainDictionary = new Hashtable(INITIAL_CAPACITY);
36
37  /** Holds the dictionary file for appending*/
38  private File dictFile = null;
39
40  /**
41   * Dictionary Constructor.
42   */
43  public SpellDictionaryHashMap() throws IOException {
44    super((File) null);
45  }
46
47  /**
48   * Dictionary Constructor.
49   */
50  public SpellDictionaryHashMap(Reader wordList) throws IOException {
51    super((File) null);
52    createDictionary(new BufferedReader(wordList));
53  }
54
55  /**
56   * Dictionary Convienence Constructor.
57   */
58  public SpellDictionaryHashMap(File wordList) throws FileNotFoundException, IOException {
59    this(new FileReader(wordList));
60    dictFile = wordList;
61  }
62
63  /**
64   * Dictionary constructor that uses an aspell phonetic file to
65   * build the transformation table.
66   */
67  public SpellDictionaryHashMap(File wordList, File phonetic) throws FileNotFoundException, IOException {
68    super(phonetic);
69    dictFile = wordList;
70    createDictionary(new BufferedReader(new FileReader(wordList)));
71  }
72
73  /**
74   * Dictionary constructor that uses an aspell phonetic file to
75   * build the transformation table.
76   * encoding is used for phonetic file only; default encoding is used for wordList
77   */
78  public SpellDictionaryHashMap(File wordList, File phonetic, String phoneticEncoding) throws FileNotFoundException, IOException {
79    super(phonetic, phoneticEncoding);
80    dictFile = wordList;
81    createDictionary(new BufferedReader(new FileReader(wordList)));
82  }
83
84  /**
85   * Dictionary constructor that uses an aspell phonetic file to
86   * build the transformation table.
87   */
88  public SpellDictionaryHashMap(Reader wordList, Reader phonetic) throws IOException {
89    super(phonetic);
90    dictFile = null;
91    createDictionary(new BufferedReader(wordList));
92  }
93
94  /**
95   * Add words from a file to existing dictionary hashmap.
96   * This function can be called as many times as needed to
97   * build the internal word list. Duplicates are not added.
98   * <p>
99   * Note that adding a dictionary does not affect the target
100   * dictionary file for the addWord method. That is, addWord() continues
101   * to make additions to the dictionary file specified in createDictionary()
102   * <P>
103   * @param wordList a File object that contains the words, on word per line.
104   * @throws FileNotFoundException
105   * @throws IOException
106   */
107  public void addDictionary(File wordList) throws FileNotFoundException, IOException {
108    addDictionaryHelper(new BufferedReader(new FileReader(wordList)));
109  }
110
111  public void addDictionary(Reader wordList) throws IOException {
112    addDictionaryHelper(new BufferedReader(wordList));
113  }
114
115  /**
116   * Add a word permanantly to the dictionary (and the dictionary file).
117   * <p>This needs to be made thread safe (synchronized)</p>
118   */
119  public void addWord(String word) {
120    putWord(word);
121    if (dictFile == null)
122      return;
123    try {
124      FileWriter w = new FileWriter(dictFile.toString(), true);
125      // Open with append.
126      w.write(word);
127      w.write("\n");
128      w.close();
129    } catch (IOException ex) {
130      System.out.println("Error writing to dictionary file");
131    }
132  }
133
134  /**
135   * Constructs the dictionary from a word list file.
136   * <p>
137   * Each word in the reader should be on a seperate line.
138   * <p>
139   * This is a very slow function. On my machine it takes quite a while to
140   * load the data in. I suspect that we could speed this up quite alot.
141   */
142  protected void createDictionary(BufferedReader in) throws IOException {
143    String line = "";
144    while (line != null) {
145      line = in.readLine();
146      if (line != null && line.length() > 0) {
147        line = new String(line.toCharArray());
148        putWord(line);
149      }
150    }
151  }
152
153  /**
154   * Adds to the existing dictionary from a word list file. If the word
155   * already exists in the dictionary, a new entry is not added.
156   * <p>
157   * Each word in the reader should be on a seperate line.
158   * <p>
159   * Note: for whatever reason that I haven't yet looked into, the phonetic codes
160   * for a particular word map to a vector of words rather than a hash table.
161   * This is a drag since in order to check for duplicates you have to iterate
162   * through all the words that use the phonetic code.
163   * If the vector-based implementation is important, it may be better
164   * to subclass for the cases where duplicates are bad.
165   */
166  protected void addDictionaryHelper(BufferedReader in) throws IOException {
167
168    String line = "";
169    while (line != null) {
170      line = in.readLine();
171      if (line != null && line.length() > 0) {
172        line = new String(line.toCharArray());
173        putWordUnique(line);
174      }
175    }
176  }
177
178  /**
179   * Allocates a word in the dictionary
180   */
181  protected void putWord(String word) {
182    String code = getCode(word);
183    Vector list = (Vector) mainDictionary.get(code);
184    if (list != null) {
185      list.addElement(word);
186    } else {
187      list = new Vector();
188      list.addElement(word);
189      mainDictionary.put(code, list);
190    }
191  }
192
193  protected void putWordUnique(String word) {
194
195    String code = getCode(word);
196    Vector list = (Vector) mainDictionary.get(code);
197
198    if (list != null) {
199
200      boolean isAlready = false;
201
202      for (int i = 0; i < list.size(); i++) {
203
204        if (word.equalsIgnoreCase((String) list.elementAt(i))) {
205          isAlready = true;
206          break;
207        }
208      }
209
210      if (!isAlready)
211        list.addElement(word);
212
213    } else {
214
215      list = new Vector();
216      list.addElement(word);
217      mainDictionary.put(code, list);
218
219    }
220  }
221
222  /**
223   * Returns a list of strings (words) for the code.
224   */
225  public List getWords(String code) {
226    //Check the main dictionary.
227    Vector mainDictResult = (Vector) mainDictionary.get(code);
228    if (mainDictResult == null)
229      return new Vector();
230    return mainDictResult;
231  }
232
233  /**
234   * Returns true if the word is correctly spelled against the current word list.
235   */
236  public boolean isCorrect(String word) {
237    List possible = getWords(getCode(word));
238    if (possible.contains(word))
239      return true;
240    //JMH should we always try the lowercase version. If I dont then capitalised
241    //words are always returned as incorrect.
242    else if (possible.contains(word.toLowerCase()))
243      return true;
244    return false;
245  }
246}
Note: See TracBrowser for help on using the repository browser.