source: 3thparty/jmessenger/src/com/swabunga/spell/event/DocumentWordTokenizer.java @ 3952

Revision 3952, 5.6 KB checked in by alexandrecorreia, 13 years ago (diff)

Ticket #1710 - Adicao do codigo fonte java do componente jmessenger(jabberit_messenger)

  • Property svn:executable set to *
Line 
1package com.swabunga.spell.event;
2
3
4import javax.swing.text.BadLocationException;
5import javax.swing.text.Document;
6import javax.swing.text.Segment;
7import java.text.BreakIterator;
8
9
10/** This class tokenizes a swing document model. It also allows for the
11 *  document model to be changed when corrections occur.
12 *
13 * @author Jason Height (jheight@chariot.net.au)
14 */
15public class DocumentWordTokenizer implements WordTokenizer {
16  /** Holds the start character position of the current word*/
17  private int currentWordPos = 0;
18  /** Holds the end character position of the current word*/
19  private int currentWordEnd = 0;
20  /** Holds the start character position of the next word*/
21  private int nextWordPos = -1;
22  /** The actual text that is being tokenized*/
23  private Document document;
24  /** The character iterator over the document*/
25  private Segment text;
26  /** The cumulative word count that have been processed*/
27  private int wordCount = 0;
28  /** Flag indicating if there are any more tokens (words) left*/
29  private boolean moreTokens = true;
30  /** Is this a special case where the currentWordStart, currntWordEnd and
31   *  nextWordPos have already been calculated. (see nextWord)
32   */
33  private boolean first = true;
34  private BreakIterator sentenceIterator;
35  private boolean startsSentence = true;
36
37  public DocumentWordTokenizer(Document document) {
38    this.document = document;
39    //Create a text segment over the etire document
40    text = new Segment();
41    sentenceIterator = BreakIterator.getSentenceInstance();
42    try {
43      document.getText(0, document.getLength(), text);
44      sentenceIterator.setText(text);
45      currentWordPos = getNextWordStart(text, 0);
46      //If the current word pos is -1 then the string was all white space
47      if (currentWordPos != -1) {
48        currentWordEnd = getNextWordEnd(text, currentWordPos);
49        nextWordPos = getNextWordStart(text, currentWordEnd);
50      } else {
51        moreTokens = false;
52      }
53    } catch (BadLocationException ex) {
54      moreTokens = false;
55    }
56  }
57
58  /** This helper method will return the start character of the next
59   * word in the buffer from the start position
60   */
61  private static int getNextWordStart(Segment text, int startPos) {
62    if (startPos <= text.getEndIndex())
63      for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {
64        if (Character.isLetterOrDigit(ch)) {
65          return text.getIndex();
66        }
67      }
68    return -1;
69  }
70
71  /** This helper method will return the end of the next word in the buffer.
72   *
73   */
74  private static int getNextWordEnd(Segment text, int startPos) {
75    for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {
76      if (!Character.isLetterOrDigit(ch)) {
77        if (ch == '-' || ch == '\'') { // handle ' and - inside words
78          char ch2 = text.next();
79          text.previous();
80          if (ch2 != Segment.DONE && Character.isLetterOrDigit(ch2))
81            continue;
82        }
83        return text.getIndex();
84      }
85    }
86    return text.getEndIndex();
87  }
88
89  /** Returns true if there are more words that can be processed in the string
90   *
91   */
92  public boolean hasMoreWords() {
93    return moreTokens;
94  }
95
96  /** Returns the current character position in the text
97   *
98   */
99  public int getCurrentWordPosition() {
100    return currentWordPos;
101  }
102
103  /** Returns the current end word position in the text
104   *
105   */
106  public int getCurrentWordEnd() {
107    return currentWordEnd;
108  }
109
110  /** Returns the next word in the text
111   *
112   */
113  public String nextWord() {
114    if (!first) {
115      currentWordPos = nextWordPos;
116      currentWordEnd = getNextWordEnd(text, currentWordPos);
117      nextWordPos = getNextWordStart(text, currentWordEnd + 1);
118    }
119    int current = sentenceIterator.current();
120    if (current == currentWordPos)
121      startsSentence = true;
122    else {
123      startsSentence = false;
124      if (currentWordEnd > current)
125        sentenceIterator.next();
126    }
127    //The nextWordPos has already been populated
128    String word = null;
129    try {
130      word = document.getText(currentWordPos, currentWordEnd - currentWordPos);
131    } catch (BadLocationException ex) {
132      moreTokens = false;
133    }
134    wordCount++;
135    first = false;
136    if (nextWordPos == -1)
137      moreTokens = false;
138    return word;
139  }
140
141  /** Returns the current number of words that have been processed
142   *
143   */
144  public int getCurrentWordCount() {
145    return wordCount;
146  }
147
148  /** Replaces the current word token*/
149  public void replaceWord(String newWord) {
150    if (currentWordPos != -1) {
151      try {
152        document.remove(currentWordPos, currentWordEnd - currentWordPos);
153        document.insertString(currentWordPos, newWord, null);
154        //Need to reset the segment
155        document.getText(0, document.getLength(), text);
156      } catch (BadLocationException ex) {
157        throw new RuntimeException(ex.getMessage());
158      }
159      //Position after the newly replaced word(s)
160      first = true;
161      currentWordPos = getNextWordStart(text, currentWordPos + newWord.length());
162      if (currentWordPos != -1) {
163        currentWordEnd = getNextWordEnd(text, currentWordPos);
164        nextWordPos = getNextWordStart(text, currentWordEnd);
165        sentenceIterator.setText(text);
166        sentenceIterator.following(currentWordPos);
167      } else
168        moreTokens = false;
169    }
170  }
171
172  /** Returns the current text that is being tokenized (includes any changes
173   *  that have been made)
174   */
175  public String getContext() {
176    return text.toString();
177  }
178
179  /** Returns true if the current word is at the start of a sentence*/
180  public boolean isNewSentence() {
181    return startsSentence;
182  }
183}
Note: See TracBrowser for help on using the repository browser.