Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

DocumentWordTokenizer.java @ 3952

Revision 3952, 5.6 KB checked in by alexandrecorreia, 13 years ago (diff)
Ticket #1710 - Adicao do codigo fonte java do componente jmessenger(jabberit_messenger)
Property svn:executable set to ``*

Rev	Line
[3952]	1	package com.swabunga.spell.event;
	2
	3
	4	import javax.swing.text.BadLocationException;
	5	import javax.swing.text.Document;
	6	import javax.swing.text.Segment;
	7	import java.text.BreakIterator;
	8
	9
	10	/** This class tokenizes a swing document model. It also allows for the
	11	* document model to be changed when corrections occur.
	12	*
	13	* @author Jason Height (jheight@chariot.net.au)
	14	*/
	15	public class DocumentWordTokenizer implements WordTokenizer {
	16	/** Holds the start character position of the current word*/
	17	private int currentWordPos = 0;
	18	/** Holds the end character position of the current word*/
	19	private int currentWordEnd = 0;
	20	/** Holds the start character position of the next word*/
	21	private int nextWordPos = -1;
	22	/** The actual text that is being tokenized*/
	23	private Document document;
	24	/** The character iterator over the document*/
	25	private Segment text;
	26	/** The cumulative word count that have been processed*/
	27	private int wordCount = 0;
	28	/** Flag indicating if there are any more tokens (words) left*/
	29	private boolean moreTokens = true;
	30	/** Is this a special case where the currentWordStart, currntWordEnd and
	31	* nextWordPos have already been calculated. (see nextWord)
	32	*/
	33	private boolean first = true;
	34	private BreakIterator sentenceIterator;
	35	private boolean startsSentence = true;
	36
	37	public DocumentWordTokenizer(Document document) {
	38	this.document = document;
	39	//Create a text segment over the etire document
	40	text = new Segment();
	41	sentenceIterator = BreakIterator.getSentenceInstance();
	42	try {
	43	document.getText(0, document.getLength(), text);
	44	sentenceIterator.setText(text);
	45	currentWordPos = getNextWordStart(text, 0);
	46	//If the current word pos is -1 then the string was all white space
	47	if (currentWordPos != -1) {
	48	currentWordEnd = getNextWordEnd(text, currentWordPos);
	49	nextWordPos = getNextWordStart(text, currentWordEnd);
	50	} else {
	51	moreTokens = false;
	52	}
	53	} catch (BadLocationException ex) {
	54	moreTokens = false;
	55	}
	56	}
	57
	58	/** This helper method will return the start character of the next
	59	* word in the buffer from the start position
	60	*/
	61	private static int getNextWordStart(Segment text, int startPos) {
	62	if (startPos <= text.getEndIndex())
	63	for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {
	64	if (Character.isLetterOrDigit(ch)) {
	65	return text.getIndex();
	66	}
	67	}
	68	return -1;
	69	}
	70
	71	/** This helper method will return the end of the next word in the buffer.
	72	*
	73	*/
	74	private static int getNextWordEnd(Segment text, int startPos) {
	75	for (char ch = text.setIndex(startPos); ch != Segment.DONE; ch = text.next()) {
	76	if (!Character.isLetterOrDigit(ch)) {
	77	if (ch == '-' \|\| ch == '\'') { // handle ' and - inside words
	78	char ch2 = text.next();
	79	text.previous();
	80	if (ch2 != Segment.DONE && Character.isLetterOrDigit(ch2))
	81	continue;
	82	}
	83	return text.getIndex();
	84	}
	85	}
	86	return text.getEndIndex();
	87	}
	88
	89	/** Returns true if there are more words that can be processed in the string
	90	*
	91	*/
	92	public boolean hasMoreWords() {
	93	return moreTokens;
	94	}
	95
	96	/** Returns the current character position in the text
	97	*
	98	*/
	99	public int getCurrentWordPosition() {
	100	return currentWordPos;
	101	}
	102
	103	/** Returns the current end word position in the text
	104	*
	105	*/
	106	public int getCurrentWordEnd() {
	107	return currentWordEnd;
	108	}
	109
	110	/** Returns the next word in the text
	111	*
	112	*/
	113	public String nextWord() {
	114	if (!first) {
	115	currentWordPos = nextWordPos;
	116	currentWordEnd = getNextWordEnd(text, currentWordPos);
	117	nextWordPos = getNextWordStart(text, currentWordEnd + 1);
	118	}
	119	int current = sentenceIterator.current();
	120	if (current == currentWordPos)
	121	startsSentence = true;
	122	else {
	123	startsSentence = false;
	124	if (currentWordEnd > current)
	125	sentenceIterator.next();
	126	}
	127	//The nextWordPos has already been populated
	128	String word = null;
	129	try {
	130	word = document.getText(currentWordPos, currentWordEnd - currentWordPos);
	131	} catch (BadLocationException ex) {
	132	moreTokens = false;
	133	}
	134	wordCount++;
	135	first = false;
	136	if (nextWordPos == -1)
	137	moreTokens = false;
	138	return word;
	139	}
	140
	141	/** Returns the current number of words that have been processed
	142	*
	143	*/
	144	public int getCurrentWordCount() {
	145	return wordCount;
	146	}
	147
	148	/** Replaces the current word token*/
	149	public void replaceWord(String newWord) {
	150	if (currentWordPos != -1) {
	151	try {
	152	document.remove(currentWordPos, currentWordEnd - currentWordPos);
	153	document.insertString(currentWordPos, newWord, null);
	154	//Need to reset the segment
	155	document.getText(0, document.getLength(), text);
	156	} catch (BadLocationException ex) {
	157	throw new RuntimeException(ex.getMessage());
	158	}
	159	//Position after the newly replaced word(s)
	160	first = true;
	161	currentWordPos = getNextWordStart(text, currentWordPos + newWord.length());
	162	if (currentWordPos != -1) {
	163	currentWordEnd = getNextWordEnd(text, currentWordPos);
	164	nextWordPos = getNextWordStart(text, currentWordEnd);
	165	sentenceIterator.setText(text);
	166	sentenceIterator.following(currentWordPos);
	167	} else
	168	moreTokens = false;
	169	}
	170	}
	171
	172	/** Returns the current text that is being tokenized (includes any changes
	173	* that have been made)
	174	*/
	175	public String getContext() {
	176	return text.toString();
	177	}
	178
	179	/** Returns true if the current word is at the start of a sentence*/
	180	public boolean isNewSentence() {
	181	return startsSentence;
	182	}
	183	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: 3thparty/jmessenger/src/com/swabunga/spell/event/DocumentWordTokenizer.java @ 3952

Download in other formats: