1 | /* |
---|
2 | * put your module comment here |
---|
3 | * formatted with JxBeauty (c) johann.langhofer@nextra.at |
---|
4 | */ |
---|
5 | |
---|
6 | package com.swabunga.spell.engine; |
---|
7 | |
---|
8 | import java.io.*; |
---|
9 | import java.util.Hashtable; |
---|
10 | import java.util.List; |
---|
11 | import java.util.Vector; |
---|
12 | |
---|
13 | /** |
---|
14 | * The SpellDictionaryHashMap holds the dictionary |
---|
15 | * <p/> |
---|
16 | * This class is thread safe. Derived classes should ensure that this preserved. |
---|
17 | * <p/> |
---|
18 | * There are many open source dictionary files. For just a few see: |
---|
19 | * http://wordlist.sourceforge.net/ |
---|
20 | * <p/> |
---|
21 | * This dictionary class reads words one per line. Make sure that your word list |
---|
22 | * is formatted in this way (most are). |
---|
23 | */ |
---|
24 | public class SpellDictionaryHashMap extends SpellDictionaryASpell { |
---|
25 | /** A field indicating the initial hash map capacity (16KB) for the main |
---|
26 | * dictionary hash map. Interested to see what the performance of a |
---|
27 | * smaller initial capacity is like. |
---|
28 | */ |
---|
29 | private final static int INITIAL_CAPACITY = 16 * 1024; |
---|
30 | |
---|
31 | /** |
---|
32 | * The hashmap that contains the word dictionary. The map is hashed on the doublemeta |
---|
33 | * code. The map entry contains a LinkedList of words that have the same double meta code. |
---|
34 | */ |
---|
35 | protected Hashtable mainDictionary = new Hashtable(INITIAL_CAPACITY); |
---|
36 | |
---|
37 | /** Holds the dictionary file for appending*/ |
---|
38 | private File dictFile = null; |
---|
39 | |
---|
40 | /** |
---|
41 | * Dictionary Constructor. |
---|
42 | */ |
---|
43 | public SpellDictionaryHashMap() throws IOException { |
---|
44 | super((File) null); |
---|
45 | } |
---|
46 | |
---|
47 | /** |
---|
48 | * Dictionary Constructor. |
---|
49 | */ |
---|
50 | public SpellDictionaryHashMap(Reader wordList) throws IOException { |
---|
51 | super((File) null); |
---|
52 | createDictionary(new BufferedReader(wordList)); |
---|
53 | } |
---|
54 | |
---|
55 | /** |
---|
56 | * Dictionary Convienence Constructor. |
---|
57 | */ |
---|
58 | public SpellDictionaryHashMap(File wordList) throws FileNotFoundException, IOException { |
---|
59 | this(new FileReader(wordList)); |
---|
60 | dictFile = wordList; |
---|
61 | } |
---|
62 | |
---|
63 | /** |
---|
64 | * Dictionary constructor that uses an aspell phonetic file to |
---|
65 | * build the transformation table. |
---|
66 | */ |
---|
67 | public SpellDictionaryHashMap(File wordList, File phonetic) throws FileNotFoundException, IOException { |
---|
68 | super(phonetic); |
---|
69 | dictFile = wordList; |
---|
70 | createDictionary(new BufferedReader(new FileReader(wordList))); |
---|
71 | } |
---|
72 | |
---|
73 | /** |
---|
74 | * Dictionary constructor that uses an aspell phonetic file to |
---|
75 | * build the transformation table. |
---|
76 | * encoding is used for phonetic file only; default encoding is used for wordList |
---|
77 | */ |
---|
78 | public SpellDictionaryHashMap(File wordList, File phonetic, String phoneticEncoding) throws FileNotFoundException, IOException { |
---|
79 | super(phonetic, phoneticEncoding); |
---|
80 | dictFile = wordList; |
---|
81 | createDictionary(new BufferedReader(new FileReader(wordList))); |
---|
82 | } |
---|
83 | |
---|
84 | /** |
---|
85 | * Dictionary constructor that uses an aspell phonetic file to |
---|
86 | * build the transformation table. |
---|
87 | */ |
---|
88 | public SpellDictionaryHashMap(Reader wordList, Reader phonetic) throws IOException { |
---|
89 | super(phonetic); |
---|
90 | dictFile = null; |
---|
91 | createDictionary(new BufferedReader(wordList)); |
---|
92 | } |
---|
93 | |
---|
94 | /** |
---|
95 | * Add words from a file to existing dictionary hashmap. |
---|
96 | * This function can be called as many times as needed to |
---|
97 | * build the internal word list. Duplicates are not added. |
---|
98 | * <p> |
---|
99 | * Note that adding a dictionary does not affect the target |
---|
100 | * dictionary file for the addWord method. That is, addWord() continues |
---|
101 | * to make additions to the dictionary file specified in createDictionary() |
---|
102 | * <P> |
---|
103 | * @param wordList a File object that contains the words, on word per line. |
---|
104 | * @throws FileNotFoundException |
---|
105 | * @throws IOException |
---|
106 | */ |
---|
107 | public void addDictionary(File wordList) throws FileNotFoundException, IOException { |
---|
108 | addDictionaryHelper(new BufferedReader(new FileReader(wordList))); |
---|
109 | } |
---|
110 | |
---|
111 | public void addDictionary(Reader wordList) throws IOException { |
---|
112 | addDictionaryHelper(new BufferedReader(wordList)); |
---|
113 | } |
---|
114 | |
---|
115 | /** |
---|
116 | * Add a word permanantly to the dictionary (and the dictionary file). |
---|
117 | * <p>This needs to be made thread safe (synchronized)</p> |
---|
118 | */ |
---|
119 | public void addWord(String word) { |
---|
120 | putWord(word); |
---|
121 | if (dictFile == null) |
---|
122 | return; |
---|
123 | try { |
---|
124 | FileWriter w = new FileWriter(dictFile.toString(), true); |
---|
125 | // Open with append. |
---|
126 | w.write(word); |
---|
127 | w.write("\n"); |
---|
128 | w.close(); |
---|
129 | } catch (IOException ex) { |
---|
130 | System.out.println("Error writing to dictionary file"); |
---|
131 | } |
---|
132 | } |
---|
133 | |
---|
134 | /** |
---|
135 | * Constructs the dictionary from a word list file. |
---|
136 | * <p> |
---|
137 | * Each word in the reader should be on a seperate line. |
---|
138 | * <p> |
---|
139 | * This is a very slow function. On my machine it takes quite a while to |
---|
140 | * load the data in. I suspect that we could speed this up quite alot. |
---|
141 | */ |
---|
142 | protected void createDictionary(BufferedReader in) throws IOException { |
---|
143 | String line = ""; |
---|
144 | while (line != null) { |
---|
145 | line = in.readLine(); |
---|
146 | if (line != null && line.length() > 0) { |
---|
147 | line = new String(line.toCharArray()); |
---|
148 | putWord(line); |
---|
149 | } |
---|
150 | } |
---|
151 | } |
---|
152 | |
---|
153 | /** |
---|
154 | * Adds to the existing dictionary from a word list file. If the word |
---|
155 | * already exists in the dictionary, a new entry is not added. |
---|
156 | * <p> |
---|
157 | * Each word in the reader should be on a seperate line. |
---|
158 | * <p> |
---|
159 | * Note: for whatever reason that I haven't yet looked into, the phonetic codes |
---|
160 | * for a particular word map to a vector of words rather than a hash table. |
---|
161 | * This is a drag since in order to check for duplicates you have to iterate |
---|
162 | * through all the words that use the phonetic code. |
---|
163 | * If the vector-based implementation is important, it may be better |
---|
164 | * to subclass for the cases where duplicates are bad. |
---|
165 | */ |
---|
166 | protected void addDictionaryHelper(BufferedReader in) throws IOException { |
---|
167 | |
---|
168 | String line = ""; |
---|
169 | while (line != null) { |
---|
170 | line = in.readLine(); |
---|
171 | if (line != null && line.length() > 0) { |
---|
172 | line = new String(line.toCharArray()); |
---|
173 | putWordUnique(line); |
---|
174 | } |
---|
175 | } |
---|
176 | } |
---|
177 | |
---|
178 | /** |
---|
179 | * Allocates a word in the dictionary |
---|
180 | */ |
---|
181 | protected void putWord(String word) { |
---|
182 | String code = getCode(word); |
---|
183 | Vector list = (Vector) mainDictionary.get(code); |
---|
184 | if (list != null) { |
---|
185 | list.addElement(word); |
---|
186 | } else { |
---|
187 | list = new Vector(); |
---|
188 | list.addElement(word); |
---|
189 | mainDictionary.put(code, list); |
---|
190 | } |
---|
191 | } |
---|
192 | |
---|
193 | protected void putWordUnique(String word) { |
---|
194 | |
---|
195 | String code = getCode(word); |
---|
196 | Vector list = (Vector) mainDictionary.get(code); |
---|
197 | |
---|
198 | if (list != null) { |
---|
199 | |
---|
200 | boolean isAlready = false; |
---|
201 | |
---|
202 | for (int i = 0; i < list.size(); i++) { |
---|
203 | |
---|
204 | if (word.equalsIgnoreCase((String) list.elementAt(i))) { |
---|
205 | isAlready = true; |
---|
206 | break; |
---|
207 | } |
---|
208 | } |
---|
209 | |
---|
210 | if (!isAlready) |
---|
211 | list.addElement(word); |
---|
212 | |
---|
213 | } else { |
---|
214 | |
---|
215 | list = new Vector(); |
---|
216 | list.addElement(word); |
---|
217 | mainDictionary.put(code, list); |
---|
218 | |
---|
219 | } |
---|
220 | } |
---|
221 | |
---|
222 | /** |
---|
223 | * Returns a list of strings (words) for the code. |
---|
224 | */ |
---|
225 | public List getWords(String code) { |
---|
226 | //Check the main dictionary. |
---|
227 | Vector mainDictResult = (Vector) mainDictionary.get(code); |
---|
228 | if (mainDictResult == null) |
---|
229 | return new Vector(); |
---|
230 | return mainDictResult; |
---|
231 | } |
---|
232 | |
---|
233 | /** |
---|
234 | * Returns true if the word is correctly spelled against the current word list. |
---|
235 | */ |
---|
236 | public boolean isCorrect(String word) { |
---|
237 | List possible = getWords(getCode(word)); |
---|
238 | if (possible.contains(word)) |
---|
239 | return true; |
---|
240 | //JMH should we always try the lowercase version. If I dont then capitalised |
---|
241 | //words are always returned as incorrect. |
---|
242 | else if (possible.contains(word.toLowerCase())) |
---|
243 | return true; |
---|
244 | return false; |
---|
245 | } |
---|
246 | } |
---|