[3952] | 1 | /* |
---|
| 2 | * put your module comment here |
---|
| 3 | * formatted with JxBeauty (c) johann.langhofer@nextra.at |
---|
| 4 | */ |
---|
| 5 | |
---|
| 6 | package com.swabunga.spell.engine; |
---|
| 7 | |
---|
| 8 | import java.io.*; |
---|
| 9 | import java.util.Hashtable; |
---|
| 10 | import java.util.List; |
---|
| 11 | import java.util.Vector; |
---|
| 12 | |
---|
| 13 | /** |
---|
| 14 | * The SpellDictionaryHashMap holds the dictionary |
---|
| 15 | * <p/> |
---|
| 16 | * This class is thread safe. Derived classes should ensure that this preserved. |
---|
| 17 | * <p/> |
---|
| 18 | * There are many open source dictionary files. For just a few see: |
---|
| 19 | * http://wordlist.sourceforge.net/ |
---|
| 20 | * <p/> |
---|
| 21 | * This dictionary class reads words one per line. Make sure that your word list |
---|
| 22 | * is formatted in this way (most are). |
---|
| 23 | */ |
---|
| 24 | public class SpellDictionaryHashMap extends SpellDictionaryASpell { |
---|
| 25 | /** A field indicating the initial hash map capacity (16KB) for the main |
---|
| 26 | * dictionary hash map. Interested to see what the performance of a |
---|
| 27 | * smaller initial capacity is like. |
---|
| 28 | */ |
---|
| 29 | private final static int INITIAL_CAPACITY = 16 * 1024; |
---|
| 30 | |
---|
| 31 | /** |
---|
| 32 | * The hashmap that contains the word dictionary. The map is hashed on the doublemeta |
---|
| 33 | * code. The map entry contains a LinkedList of words that have the same double meta code. |
---|
| 34 | */ |
---|
| 35 | protected Hashtable mainDictionary = new Hashtable(INITIAL_CAPACITY); |
---|
| 36 | |
---|
| 37 | /** Holds the dictionary file for appending*/ |
---|
| 38 | private File dictFile = null; |
---|
| 39 | |
---|
| 40 | /** |
---|
| 41 | * Dictionary Constructor. |
---|
| 42 | */ |
---|
| 43 | public SpellDictionaryHashMap() throws IOException { |
---|
| 44 | super((File) null); |
---|
| 45 | } |
---|
| 46 | |
---|
| 47 | /** |
---|
| 48 | * Dictionary Constructor. |
---|
| 49 | */ |
---|
| 50 | public SpellDictionaryHashMap(Reader wordList) throws IOException { |
---|
| 51 | super((File) null); |
---|
| 52 | createDictionary(new BufferedReader(wordList)); |
---|
| 53 | } |
---|
| 54 | |
---|
| 55 | /** |
---|
| 56 | * Dictionary Convienence Constructor. |
---|
| 57 | */ |
---|
| 58 | public SpellDictionaryHashMap(File wordList) throws FileNotFoundException, IOException { |
---|
| 59 | this(new FileReader(wordList)); |
---|
| 60 | dictFile = wordList; |
---|
| 61 | } |
---|
| 62 | |
---|
| 63 | /** |
---|
| 64 | * Dictionary constructor that uses an aspell phonetic file to |
---|
| 65 | * build the transformation table. |
---|
| 66 | */ |
---|
| 67 | public SpellDictionaryHashMap(File wordList, File phonetic) throws FileNotFoundException, IOException { |
---|
| 68 | super(phonetic); |
---|
| 69 | dictFile = wordList; |
---|
| 70 | createDictionary(new BufferedReader(new FileReader(wordList))); |
---|
| 71 | } |
---|
| 72 | |
---|
| 73 | /** |
---|
| 74 | * Dictionary constructor that uses an aspell phonetic file to |
---|
| 75 | * build the transformation table. |
---|
| 76 | * encoding is used for phonetic file only; default encoding is used for wordList |
---|
| 77 | */ |
---|
| 78 | public SpellDictionaryHashMap(File wordList, File phonetic, String phoneticEncoding) throws FileNotFoundException, IOException { |
---|
| 79 | super(phonetic, phoneticEncoding); |
---|
| 80 | dictFile = wordList; |
---|
| 81 | createDictionary(new BufferedReader(new FileReader(wordList))); |
---|
| 82 | } |
---|
| 83 | |
---|
| 84 | /** |
---|
| 85 | * Dictionary constructor that uses an aspell phonetic file to |
---|
| 86 | * build the transformation table. |
---|
| 87 | */ |
---|
| 88 | public SpellDictionaryHashMap(Reader wordList, Reader phonetic) throws IOException { |
---|
| 89 | super(phonetic); |
---|
| 90 | dictFile = null; |
---|
| 91 | createDictionary(new BufferedReader(wordList)); |
---|
| 92 | } |
---|
| 93 | |
---|
| 94 | /** |
---|
| 95 | * Add words from a file to existing dictionary hashmap. |
---|
| 96 | * This function can be called as many times as needed to |
---|
| 97 | * build the internal word list. Duplicates are not added. |
---|
| 98 | * <p> |
---|
| 99 | * Note that adding a dictionary does not affect the target |
---|
| 100 | * dictionary file for the addWord method. That is, addWord() continues |
---|
| 101 | * to make additions to the dictionary file specified in createDictionary() |
---|
| 102 | * <P> |
---|
| 103 | * @param wordList a File object that contains the words, on word per line. |
---|
| 104 | * @throws FileNotFoundException |
---|
| 105 | * @throws IOException |
---|
| 106 | */ |
---|
| 107 | public void addDictionary(File wordList) throws FileNotFoundException, IOException { |
---|
| 108 | addDictionaryHelper(new BufferedReader(new FileReader(wordList))); |
---|
| 109 | } |
---|
| 110 | |
---|
| 111 | public void addDictionary(Reader wordList) throws IOException { |
---|
| 112 | addDictionaryHelper(new BufferedReader(wordList)); |
---|
| 113 | } |
---|
| 114 | |
---|
| 115 | /** |
---|
| 116 | * Add a word permanantly to the dictionary (and the dictionary file). |
---|
| 117 | * <p>This needs to be made thread safe (synchronized)</p> |
---|
| 118 | */ |
---|
| 119 | public void addWord(String word) { |
---|
| 120 | putWord(word); |
---|
| 121 | if (dictFile == null) |
---|
| 122 | return; |
---|
| 123 | try { |
---|
| 124 | FileWriter w = new FileWriter(dictFile.toString(), true); |
---|
| 125 | // Open with append. |
---|
| 126 | w.write(word); |
---|
| 127 | w.write("\n"); |
---|
| 128 | w.close(); |
---|
| 129 | } catch (IOException ex) { |
---|
| 130 | System.out.println("Error writing to dictionary file"); |
---|
| 131 | } |
---|
| 132 | } |
---|
| 133 | |
---|
| 134 | /** |
---|
| 135 | * Constructs the dictionary from a word list file. |
---|
| 136 | * <p> |
---|
| 137 | * Each word in the reader should be on a seperate line. |
---|
| 138 | * <p> |
---|
| 139 | * This is a very slow function. On my machine it takes quite a while to |
---|
| 140 | * load the data in. I suspect that we could speed this up quite alot. |
---|
| 141 | */ |
---|
| 142 | protected void createDictionary(BufferedReader in) throws IOException { |
---|
| 143 | String line = ""; |
---|
| 144 | while (line != null) { |
---|
| 145 | line = in.readLine(); |
---|
| 146 | if (line != null && line.length() > 0) { |
---|
| 147 | line = new String(line.toCharArray()); |
---|
| 148 | putWord(line); |
---|
| 149 | } |
---|
| 150 | } |
---|
| 151 | } |
---|
| 152 | |
---|
| 153 | /** |
---|
| 154 | * Adds to the existing dictionary from a word list file. If the word |
---|
| 155 | * already exists in the dictionary, a new entry is not added. |
---|
| 156 | * <p> |
---|
| 157 | * Each word in the reader should be on a seperate line. |
---|
| 158 | * <p> |
---|
| 159 | * Note: for whatever reason that I haven't yet looked into, the phonetic codes |
---|
| 160 | * for a particular word map to a vector of words rather than a hash table. |
---|
| 161 | * This is a drag since in order to check for duplicates you have to iterate |
---|
| 162 | * through all the words that use the phonetic code. |
---|
| 163 | * If the vector-based implementation is important, it may be better |
---|
| 164 | * to subclass for the cases where duplicates are bad. |
---|
| 165 | */ |
---|
| 166 | protected void addDictionaryHelper(BufferedReader in) throws IOException { |
---|
| 167 | |
---|
| 168 | String line = ""; |
---|
| 169 | while (line != null) { |
---|
| 170 | line = in.readLine(); |
---|
| 171 | if (line != null && line.length() > 0) { |
---|
| 172 | line = new String(line.toCharArray()); |
---|
| 173 | putWordUnique(line); |
---|
| 174 | } |
---|
| 175 | } |
---|
| 176 | } |
---|
| 177 | |
---|
| 178 | /** |
---|
| 179 | * Allocates a word in the dictionary |
---|
| 180 | */ |
---|
| 181 | protected void putWord(String word) { |
---|
| 182 | String code = getCode(word); |
---|
| 183 | Vector list = (Vector) mainDictionary.get(code); |
---|
| 184 | if (list != null) { |
---|
| 185 | list.addElement(word); |
---|
| 186 | } else { |
---|
| 187 | list = new Vector(); |
---|
| 188 | list.addElement(word); |
---|
| 189 | mainDictionary.put(code, list); |
---|
| 190 | } |
---|
| 191 | } |
---|
| 192 | |
---|
| 193 | protected void putWordUnique(String word) { |
---|
| 194 | |
---|
| 195 | String code = getCode(word); |
---|
| 196 | Vector list = (Vector) mainDictionary.get(code); |
---|
| 197 | |
---|
| 198 | if (list != null) { |
---|
| 199 | |
---|
| 200 | boolean isAlready = false; |
---|
| 201 | |
---|
| 202 | for (int i = 0; i < list.size(); i++) { |
---|
| 203 | |
---|
| 204 | if (word.equalsIgnoreCase((String) list.elementAt(i))) { |
---|
| 205 | isAlready = true; |
---|
| 206 | break; |
---|
| 207 | } |
---|
| 208 | } |
---|
| 209 | |
---|
| 210 | if (!isAlready) |
---|
| 211 | list.addElement(word); |
---|
| 212 | |
---|
| 213 | } else { |
---|
| 214 | |
---|
| 215 | list = new Vector(); |
---|
| 216 | list.addElement(word); |
---|
| 217 | mainDictionary.put(code, list); |
---|
| 218 | |
---|
| 219 | } |
---|
| 220 | } |
---|
| 221 | |
---|
| 222 | /** |
---|
| 223 | * Returns a list of strings (words) for the code. |
---|
| 224 | */ |
---|
| 225 | public List getWords(String code) { |
---|
| 226 | //Check the main dictionary. |
---|
| 227 | Vector mainDictResult = (Vector) mainDictionary.get(code); |
---|
| 228 | if (mainDictResult == null) |
---|
| 229 | return new Vector(); |
---|
| 230 | return mainDictResult; |
---|
| 231 | } |
---|
| 232 | |
---|
| 233 | /** |
---|
| 234 | * Returns true if the word is correctly spelled against the current word list. |
---|
| 235 | */ |
---|
| 236 | public boolean isCorrect(String word) { |
---|
| 237 | List possible = getWords(getCode(word)); |
---|
| 238 | if (possible.contains(word)) |
---|
| 239 | return true; |
---|
| 240 | //JMH should we always try the lowercase version. If I dont then capitalised |
---|
| 241 | //words are always returned as incorrect. |
---|
| 242 | else if (possible.contains(word.toLowerCase())) |
---|
| 243 | return true; |
---|
| 244 | return false; |
---|
| 245 | } |
---|
| 246 | } |
---|