Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Annotate
Revision Log

DocumentWordTokenizer.java @ 3952

Revision 3952, 5.6 KB checked in by alexandrecorreia, 13 years ago (diff)
Ticket #1710 - Adicao do codigo fonte java do componente jmessenger(jabberit_messenger)
Property svn:executable set to ``*

Line
1	package com.swabunga.spell.event;
2
3
4	import javax.swing.text.BadLocationException;
5	import javax.swing.text.Document;
6	import javax.swing.text.Segment;
7	import java.text.BreakIterator;
8
9
10	/** This class tokenizes a swing document model. It also allows for the
11	* document model to be changed when corrections occur.
12	*
13	* @author Jason Height (jheight@chariot.net.au)
14	*/
15	public class DocumentWordTokenizer implements WordTokenizer {
16	/** Holds the start character position of the current word*/
17	private int currentWordPos = 0;
18	/** Holds the end character position of the current word*/
19	private int currentWordEnd = 0;
20	/** Holds the start character position of the next word*/
21	private int nextWordPos = -1;
22	/** The actual text that is being tokenized*/
23	private Document document;
24	/** The character iterator over the document*/
25	private Segment text;
26	/** The cumulative word count that have been processed*/
27	private int wordCount = 0;
28	/** Flag indicating if there are any more tokens (words) left*/
29	private boolean moreTokens = true;
30	/** Is this a special case where the currentWordStart, currntWordEnd and
31	* nextWordPos have already been calculated. (see nextWord)
32	*/
33	private boolean first = true;
34	private BreakIterator sentenceIterator;
35	private boolean startsSentence = true;
36
37	public DocumentWordTokenizer(Document document) {
38	this.document = document;
39	//Create a text segment over the etire document
40	text = new Segment();
41	sentenceIterator = BreakIterator.getSentenceInstance();
42	try {
43	document.getText(0, document.getLength(), text);
44	sentenceIterator.setText(text);
45	currentWordPos = getNextWordStart(text, 0);
46	//If the current word pos is -1 then the string was all white space
47	if (currentWordPos != -1) {
48	currentWordEnd = getNextWordEnd(text, currentWordPos);
49	nextWordPos = getNextWordStart(text, currentWordEnd);
50	} else {
51	moreTokens = false;
52	}
53	} catch (BadLocationException ex) {
54	moreTokens = false;
55	}
56	}
57
58	/** This helper method will return the start character of the next
59	* word in the buffer from the start position
60	*/
61	private static int getNextWordStart(Segment text, int startPos) {
62	if (startPos <= text.getEndIndex())
63	for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {
64	if (Character.isLetterOrDigit(ch)) {
65	return text.getIndex();
66	}
67	}
68	return -1;
69	}
70
71	/** This helper method will return the end of the next word in the buffer.
72	*
73	*/
74	private static int getNextWordEnd(Segment text, int startPos) {
75	for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {
76	if (!Character.isLetterOrDigit(ch)) {
77	if (ch == '-' \|\| ch == '\'') { // handle ' and - inside words
78	char ch2 = text.next();
79	text.previous();
80	if (ch2 != Segment.DONE && Character.isLetterOrDigit(ch2))
81	continue;
82	}
83	return text.getIndex();
84	}
85	}
86	return text.getEndIndex();
87	}
88
89	/** Returns true if there are more words that can be processed in the string
90	*
91	*/
92	public boolean hasMoreWords() {
93	return moreTokens;
94	}
95
96	/** Returns the current character position in the text
97	*
98	*/
99	public int getCurrentWordPosition() {
100	return currentWordPos;
101	}
102
103	/** Returns the current end word position in the text
104	*
105	*/
106	public int getCurrentWordEnd() {
107	return currentWordEnd;
108	}
109
110	/** Returns the next word in the text
111	*
112	*/
113	public String nextWord() {
114	if (!first) {
115	currentWordPos = nextWordPos;
116	currentWordEnd = getNextWordEnd(text, currentWordPos);
117	nextWordPos = getNextWordStart(text, currentWordEnd + 1);
118	}
119	int current = sentenceIterator.current();
120	if (current == currentWordPos)
121	startsSentence = true;
122	else {
123	startsSentence = false;
124	if (currentWordEnd > current)
125	sentenceIterator.next();
126	}
127	//The nextWordPos has already been populated
128	String word = null;
129	try {
130	word = document.getText(currentWordPos, currentWordEnd - currentWordPos);
131	} catch (BadLocationException ex) {
132	moreTokens = false;
133	}
134	wordCount++;
135	first = false;
136	if (nextWordPos == -1)
137	moreTokens = false;
138	return word;
139	}
140
141	/** Returns the current number of words that have been processed
142	*
143	*/
144	public int getCurrentWordCount() {
145	return wordCount;
146	}
147
148	/** Replaces the current word token*/
149	public void replaceWord(String newWord) {
150	if (currentWordPos != -1) {
151	try {
152	document.remove(currentWordPos, currentWordEnd - currentWordPos);
153	document.insertString(currentWordPos, newWord, null);
154	//Need to reset the segment
155	document.getText(0, document.getLength(), text);
156	} catch (BadLocationException ex) {
157	throw new RuntimeException(ex.getMessage());
158	}
159	//Position after the newly replaced word(s)
160	first = true;
161	currentWordPos = getNextWordStart(text, currentWordPos + newWord.length());
162	if (currentWordPos != -1) {
163	currentWordEnd = getNextWordEnd(text, currentWordPos);
164	nextWordPos = getNextWordStart(text, currentWordEnd);
165	sentenceIterator.setText(text);
166	sentenceIterator.following(currentWordPos);
167	} else
168	moreTokens = false;
169	}
170	}
171
172	/** Returns the current text that is being tokenized (includes any changes
173	* that have been made)
174	*/
175	public String getContext() {
176	return text.toString();
177	}
178
179	/** Returns true if the current word is at the start of a sentence*/
180	public boolean isNewSentence() {
181	return startsSentence;
182	}
183	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: 3thparty/jmessenger/src/com/swabunga/spell/event/DocumentWordTokenizer.java @ 3952

Download in other formats: