source: branches/2.2/jabberit_messenger/java_source/src/com/swabunga/spell/engine/SpellDictionaryDichoDisk.java @ 3102

Revision 3102, 5.1 KB checked in by amuller, 14 years ago (diff)

Ticket #986 - Efetuado merge para o Branch 2.2( atualizacao do modulo)

  • Property svn:executable set to *
Line 
1package com.swabunga.spell.engine;
2
3import java.io.*;
4import java.util.*;
5
6/**
7 * Another implementation of <code>SpellDictionary</code> that doesn't cache any words in memory. Avoids the huge
8 * footprint of <code>SpellDictionaryHashMap</code> at the cost of relatively minor latency. A future version
9 * of this class that implements some caching strategies might be a good idea in the future, if there's any
10 * demand for it.
11 *
12 * This implementation requires a special dictionary file, with "code*word" lines sorted by code.
13 * It's using a dichotomy algorithm to search for words in the dictionary
14 *
15 * @author Damien Guillaume
16 * @version 0.1
17 */
18public class SpellDictionaryDichoDisk extends SpellDictionaryASpell {
19
20  /** Holds the dictionary file for reading*/
21  private RandomAccessFile dictFile = null;
22 
23  /** dictionary and phonetic file encoding */
24  private String encoding = null;
25
26  /**
27   * Dictionary Convienence Constructor.
28   */
29  public SpellDictionaryDichoDisk(File wordList)
30    throws FileNotFoundException, IOException {
31    super((File) null);
32    dictFile = new RandomAccessFile(wordList, "r");
33  }
34
35  /**
36   * Dictionary Convienence Constructor.
37   */
38  public SpellDictionaryDichoDisk(File wordList, String encoding)
39    throws FileNotFoundException, IOException {
40    super((File) null);
41    this.encoding = encoding;
42    dictFile = new RandomAccessFile(wordList, "r");
43  }
44
45  /**
46  * Dictionary constructor that uses an aspell phonetic file to
47  * build the transformation table.
48  */
49  public SpellDictionaryDichoDisk(File wordList, File phonetic)
50    throws FileNotFoundException, IOException {
51    super(phonetic);
52    dictFile = new RandomAccessFile(wordList, "r");
53  }
54 
55  /**
56  * Dictionary constructor that uses an aspell phonetic file to
57  * build the transformation table.
58  */
59  public SpellDictionaryDichoDisk(File wordList, File phonetic, String encoding)
60    throws FileNotFoundException, IOException {
61    super(phonetic, encoding);
62    this.encoding = encoding;
63    dictFile = new RandomAccessFile(wordList, "r");
64  }
65 
66  /**
67   * Add a word permanantly to the dictionary (and the dictionary file).
68   * <i>not implemented !</i>
69   */
70  public void addWord(String word) {
71    System.err.println("error: addWord is not implemented for SpellDictionaryDichoDisk");
72  }
73
74  /**
75    * Search the dictionary file for the words corresponding to the code
76    * within positions p1 - p2
77    */
78   private LinkedList dichoFind(String code, long p1, long p2) throws IOException {
79     //System.out.println("dichoFind("+code+","+p1+","+p2+")");
80     long pm = (p1 + p2) / 2;
81    dictFile.seek(pm);
82    String l;
83    if (encoding == null)
84      l = dictFile.readLine();
85    else
86      l = dictReadLine();
87    pm = dictFile.getFilePointer();
88    if (encoding == null)
89      l = dictFile.readLine();
90    else
91      l = dictReadLine();
92    long pm2 = dictFile.getFilePointer();
93    if (pm2 >= p2)
94      return(seqFind(code, p1, p2));
95    int istar = l.indexOf('*');
96    if (istar == -1)
97      throw new IOException("bad format: no * !");
98    String testcode = l.substring(0, istar);
99    int comp = code.compareTo(testcode);
100    if (comp < 0)
101      return(dichoFind(code, p1, pm-1));
102    else if (comp > 0)
103      return(dichoFind(code, pm2, p2));
104    else {
105      LinkedList l1 = dichoFind(code, p1, pm-1);
106      LinkedList l2 = dichoFind(code, pm2, p2);
107      String word = l.substring(istar+1);
108      l1.add(word);
109      l1.addAll(l2);
110      return(l1);
111    }
112   }
113   
114   private LinkedList seqFind(String code, long p1, long p2) throws IOException {
115     //System.out.println("seqFind("+code+","+p1+","+p2+")");
116     LinkedList list = new LinkedList();
117    dictFile.seek(p1);
118    while (dictFile.getFilePointer() < p2) {
119      String l;
120      if (encoding == null)
121        l = dictFile.readLine();
122      else
123        l = dictReadLine();
124      int istar = l.indexOf('*');
125      if (istar == -1)
126        throw new IOException("bad format: no * !");
127      String testcode = l.substring(0, istar);
128      if (testcode.equals(code)) {
129        String word = l.substring(istar+1);
130        list.add(word);
131      }
132    }
133    return(list);
134   }
135   
136   /**
137     * Read a line of dictFile with a specific encoding
138     */
139   private String dictReadLine() throws IOException {
140     int max = 255;
141     byte b=0;
142    byte[] buf = new byte[max];
143    int i=0;
144     try {
145       for (; b != '\n' && b != '\r' && i<max-1; i++) {
146        b = dictFile.readByte();
147         buf[i] = b;
148      }
149    } catch (EOFException ex) {
150    }
151    if (i == 0)
152      return("");
153    String s = new String(buf, 0, i-1, encoding);
154    return(s);
155   }
156   
157  /**
158   * Returns a list of strings (words) for the code.
159   */
160  public List getWords(String code) {
161     //System.out.println("getWords("+code+")");
162    LinkedList list;
163    try {
164      list = dichoFind(code, 0, dictFile.length()-1);
165      //System.out.println(list);
166    } catch (IOException ex) {
167      System.err.println("IOException: " + ex.getMessage());
168      list = new LinkedList();
169    }
170    return list;
171  }
172
173}
Note: See TracBrowser for help on using the repository browser.