1 | package com.swabunga.spell.event; |
---|
2 | |
---|
3 | /** |
---|
4 | * <p>An interface for objects which take a text-based media as input, and iterate through |
---|
5 | * the words in the text stored in that media. Examples of such media could be Strings, |
---|
6 | * Documents, Files, TextComponents etc. |
---|
7 | * </P> |
---|
8 | * |
---|
9 | * <P> |
---|
10 | * When the object is instantiated, and before the first call to <CODE>next()</CODE> is made, |
---|
11 | * the following methods should throw a <CODE>WordNotFoundException</CODE>:<br> |
---|
12 | * <CODE>getCurrentWordEnd()</CODE>, <CODE>getCurrentWordPosition()</CODE>, |
---|
13 | * <CODE>isNewSentence()</CODE> and <CODE>replaceWord()</CODE>. |
---|
14 | * </P> |
---|
15 | * |
---|
16 | * <P>A call to <CODE>next()</CODE> when <CODE>hasMoreWords()</CODE> returns false |
---|
17 | * should throw a <CODE>WordNotFoundException</CODE>.</P> |
---|
18 | * @author Jason Height (jheight@chariot.net.au) |
---|
19 | */ |
---|
20 | public interface WordTokenizer { |
---|
21 | |
---|
22 | //~ Methods ................................................................. |
---|
23 | |
---|
24 | /** |
---|
25 | * Returns the context text that is being tokenized (should include any |
---|
26 | * changes that have been made). |
---|
27 | * @return the text being searched. |
---|
28 | */ |
---|
29 | public String getContext(); |
---|
30 | |
---|
31 | /** |
---|
32 | * Returns the number of word tokens that have been processed thus far |
---|
33 | * @return the number of words found so far. |
---|
34 | */ |
---|
35 | public int getCurrentWordCount(); |
---|
36 | |
---|
37 | /** |
---|
38 | * Returns an index representing the end location of the current word in the text. |
---|
39 | * @return index of the end of the current word in the text. |
---|
40 | * @throws WordNotFoundException current word has not yet been set. |
---|
41 | */ |
---|
42 | public int getCurrentWordEnd(); |
---|
43 | |
---|
44 | /** |
---|
45 | * Returns an index representing the start location of the current word in the text. |
---|
46 | * @return index of the start of the current word in the text. |
---|
47 | * @throws WordNotFoundException current word has not yet been set. |
---|
48 | */ |
---|
49 | public int getCurrentWordPosition(); |
---|
50 | |
---|
51 | /** |
---|
52 | * Returns true if the current word is at the start of a sentence |
---|
53 | * @return true if the current word starts a sentence. |
---|
54 | * @throws WordNotFoundException current word has not yet been set. |
---|
55 | */ |
---|
56 | public boolean isNewSentence(); |
---|
57 | |
---|
58 | /** |
---|
59 | * Returns true if there are more words left |
---|
60 | * @return true if more words can be found in the text. |
---|
61 | */ |
---|
62 | public boolean hasMoreWords(); |
---|
63 | |
---|
64 | /** |
---|
65 | * This returns the next word in the iteration. Note that any implementation should return |
---|
66 | * the current word, and then replace the current word with the next word found in the |
---|
67 | * input text (if one exists). |
---|
68 | * @return the next word in the iteration. |
---|
69 | * @throws WordNotFoundException search string contains no more words. |
---|
70 | */ |
---|
71 | public String nextWord(); |
---|
72 | |
---|
73 | /** |
---|
74 | * Replaces the current word token |
---|
75 | * |
---|
76 | * <p/> |
---|
77 | * When a word is replaced care should be taken that the WordTokenizer |
---|
78 | * repositions itself such that the words that were added aren't rechecked. |
---|
79 | * Of course this is not mandatory, maybe there is a case when an |
---|
80 | * application doesnt need to do this. |
---|
81 | * <p/> |
---|
82 | * @param newWord the string which should replace the current word. |
---|
83 | * @throws WordNotFoundException current word has not yet been set. |
---|
84 | */ |
---|
85 | public void replaceWord(String newWord); |
---|
86 | } |
---|