1 | /** |
---|
2 | * MailArchiver is an application that provides services for storing and managing e-mail messages through a Web Services SOAP interface. |
---|
3 | * Copyright (C) 2012 Marcio Andre Scholl Levien and Fernando Alberto Reuter Wendt and Jose Ronaldo Nogueira Fonseca Junior |
---|
4 | * |
---|
5 | * This program is free software: you can redistribute it and/or modify |
---|
6 | * it under the terms of the GNU Affero General Public License as |
---|
7 | * published by the Free Software Foundation, either version 3 of the |
---|
8 | * License, or (at your option) any later version. |
---|
9 | * |
---|
10 | * This program is distributed in the hope that it will be useful, |
---|
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
13 | * GNU Affero General Public License for more details. |
---|
14 | * |
---|
15 | * You should have received a copy of the GNU Affero General Public License |
---|
16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
17 | */ |
---|
18 | |
---|
19 | /******************************************************************************\ |
---|
20 | * |
---|
21 | * This product was developed by |
---|
22 | * |
---|
23 | * SERVIÃO FEDERAL DE PROCESSAMENTO DE DADOS (SERPRO), |
---|
24 | * |
---|
25 | * a government company established under Brazilian law (5.615/70), |
---|
26 | * at Department of Development of Porto Alegre. |
---|
27 | * |
---|
28 | \******************************************************************************/ |
---|
29 | |
---|
30 | package serpro.mailarchiver.util; |
---|
31 | |
---|
32 | import java.io.Closeable; |
---|
33 | import java.io.IOException; |
---|
34 | import java.nio.file.Path; |
---|
35 | import java.util.ArrayList; |
---|
36 | import java.util.HashMap; |
---|
37 | import java.util.List; |
---|
38 | import java.util.Map; |
---|
39 | import java.util.Timer; |
---|
40 | import java.util.TimerTask; |
---|
41 | |
---|
42 | import org.apache.lucene.analysis.Analyzer; |
---|
43 | import org.apache.lucene.analysis.br.BrazilianAnalyzer; |
---|
44 | import org.apache.lucene.document.Document; |
---|
45 | import org.apache.lucene.index.IndexReader; |
---|
46 | import org.apache.lucene.index.IndexWriter; |
---|
47 | import org.apache.lucene.index.IndexWriterConfig; |
---|
48 | import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
---|
49 | import org.apache.lucene.index.Term; |
---|
50 | import org.apache.lucene.index.TieredMergePolicy; |
---|
51 | import org.apache.lucene.queryParser.ParseException; |
---|
52 | import org.apache.lucene.queryParser.QueryParser; |
---|
53 | import org.apache.lucene.search.IndexSearcher; |
---|
54 | import org.apache.lucene.search.Query; |
---|
55 | import org.apache.lucene.search.ScoreDoc; |
---|
56 | import org.apache.lucene.search.Sort; |
---|
57 | import org.apache.lucene.search.SortField; |
---|
58 | import org.apache.lucene.search.TopDocs; |
---|
59 | import org.apache.lucene.store.Directory; |
---|
60 | import org.apache.lucene.store.FSDirectory; |
---|
61 | import org.apache.lucene.util.Version; |
---|
62 | |
---|
63 | import org.springframework.beans.factory.annotation.Autowired; |
---|
64 | import org.springframework.beans.factory.annotation.Configurable; |
---|
65 | |
---|
66 | @Configurable(preConstruction=true) |
---|
67 | public class LuceneIndex { |
---|
68 | |
---|
69 | private static final Logger log = Logger.getLocalLogger(); |
---|
70 | |
---|
71 | public enum Order { |
---|
72 | DateAsc ("date", SortField.STRING, false), |
---|
73 | DateDesc ("date", SortField.STRING, true), |
---|
74 | SubjectAsc ("subject", SortField.STRING, false), |
---|
75 | SubjectDesc ("subject", SortField.STRING, true), |
---|
76 | FromAsc ("from", SortField.STRING, false), |
---|
77 | FromDesc ("from", SortField.STRING, true), |
---|
78 | FromMboxAsc ("from_mbox", SortField.STRING, false), |
---|
79 | FromMboxDesc ("from_mbox", SortField.STRING, true), |
---|
80 | SenderAsc ("sender", SortField.STRING, false), |
---|
81 | SenderDesc ("sender", SortField.STRING, true), |
---|
82 | SenderMboxAsc ("sender_mbox", SortField.STRING, false), |
---|
83 | SenderMboxDesc ("sender_mbox", SortField.STRING, true), |
---|
84 | ToAsc ("to", SortField.STRING, false), |
---|
85 | ToDesc ("to", SortField.STRING, true), |
---|
86 | ToMboxAsc ("to_mbox", SortField.STRING, false), |
---|
87 | ToMboxDesc ("to_mbox", SortField.STRING, true), |
---|
88 | CcAsc ("cc", SortField.STRING, false), |
---|
89 | CcDesc ("cc", SortField.STRING, true), |
---|
90 | CcMboxAsc ("cc_mbox", SortField.STRING, false), |
---|
91 | CcMboxDesc ("cc_mbox", SortField.STRING, true), |
---|
92 | BccAsc ("bcc", SortField.STRING, false), |
---|
93 | BccDesc ("bcc", SortField.STRING, true), |
---|
94 | BccMboxAsc ("bcc_mbox", SortField.STRING, false), |
---|
95 | BccMboxDesc ("bcc_mbox", SortField.STRING, true); |
---|
96 | |
---|
97 | private final SortField sortField; |
---|
98 | |
---|
99 | Order(String field, int type, boolean reverse) { |
---|
100 | sortField = new SortField(field, type, reverse); |
---|
101 | } |
---|
102 | } |
---|
103 | |
---|
104 | private Directory directory; |
---|
105 | private Analyzer analyzer; |
---|
106 | private TieredMergePolicy mergePolicy; |
---|
107 | private IndexWriterConfig writerConfig; |
---|
108 | private IndexWriter writer; |
---|
109 | |
---|
110 | private static final long MAX_INACTIVITY_INTERVAL = 300000; |
---|
111 | private final Timer closeTimer; |
---|
112 | private TimerTask closeTask; |
---|
113 | private long lastRequestTime; |
---|
114 | |
---|
115 | public synchronized void addMessage(Document doc) throws IOException { |
---|
116 | |
---|
117 | insureOpenness(); |
---|
118 | |
---|
119 | try { |
---|
120 | writer.addDocument(doc); |
---|
121 | writer.commit(); |
---|
122 | } |
---|
123 | catch(OutOfMemoryError ex) { |
---|
124 | close(); |
---|
125 | log.error(ex); |
---|
126 | throw new IOException(ex); |
---|
127 | } |
---|
128 | } |
---|
129 | |
---|
130 | public synchronized void deleteMessage(String messageId) throws IOException { |
---|
131 | |
---|
132 | insureOpenness(); |
---|
133 | |
---|
134 | Term term = new Term("message_id", messageId); |
---|
135 | |
---|
136 | try { |
---|
137 | writer.deleteDocuments(term); |
---|
138 | writer.commit(); |
---|
139 | } |
---|
140 | catch(OutOfMemoryError ex) { |
---|
141 | close(); |
---|
142 | log.error(ex); |
---|
143 | throw new IOException(ex); |
---|
144 | } |
---|
145 | } |
---|
146 | |
---|
147 | public synchronized String[] search(String queryExpression, Order... sortOrder) throws IOException { |
---|
148 | |
---|
149 | insureOpenness(); |
---|
150 | |
---|
151 | IndexReader reader = null; |
---|
152 | IndexSearcher searcher = null; |
---|
153 | |
---|
154 | try { |
---|
155 | reader = IndexReader.open(writer, true); |
---|
156 | searcher = new IndexSearcher(reader); |
---|
157 | |
---|
158 | QueryParser parser = new QueryParser(Version.LUCENE_35, "body", analyzer); |
---|
159 | //ComplexPhraseQueryParser parser = new ComplexPhraseQueryParser(Version.LUCENE_35, "body", analyzer); |
---|
160 | parser.setAllowLeadingWildcard(true); |
---|
161 | |
---|
162 | try { |
---|
163 | Query query = parser.parse(queryExpression); |
---|
164 | |
---|
165 | List<SortField> sortFields = new ArrayList<SortField>(); |
---|
166 | for(Order order : sortOrder) { |
---|
167 | if( ! sortFields.contains(order.sortField)) { |
---|
168 | sortFields.add(order.sortField); |
---|
169 | } |
---|
170 | } |
---|
171 | |
---|
172 | TopDocs results; |
---|
173 | |
---|
174 | if(sortFields.size() > 0) { |
---|
175 | Sort sort = new Sort(); |
---|
176 | sort.setSort(sortFields.toArray(new SortField[sortFields.size()])); |
---|
177 | results = searcher.search(query, Integer.MAX_VALUE, sort); |
---|
178 | } |
---|
179 | else { |
---|
180 | results = searcher.search(query, Integer.MAX_VALUE); |
---|
181 | } |
---|
182 | |
---|
183 | ScoreDoc[] hits = results.scoreDocs; |
---|
184 | |
---|
185 | String[] oids = new String[hits.length]; |
---|
186 | |
---|
187 | for(int i = 0; i < hits.length; i++) { |
---|
188 | Document doc = searcher.doc(hits[i].doc); |
---|
189 | oids[i] = doc.get("message_id"); |
---|
190 | } |
---|
191 | |
---|
192 | return oids; |
---|
193 | } |
---|
194 | catch(ParseException ex) { |
---|
195 | log.error(ex); |
---|
196 | return new String[]{}; |
---|
197 | } |
---|
198 | } |
---|
199 | finally { |
---|
200 | close(searcher); |
---|
201 | close(reader); |
---|
202 | } |
---|
203 | } |
---|
204 | |
---|
205 | //-------------------------------------------------------------------------- |
---|
206 | @Autowired |
---|
207 | private UserAppConfig userAppConfig; |
---|
208 | |
---|
209 | private final Path absolutePath; |
---|
210 | |
---|
211 | public final Path getAbsolutePath() { |
---|
212 | return absolutePath; |
---|
213 | } |
---|
214 | |
---|
215 | private static final Map<String, LuceneIndex> instances = new HashMap<String, LuceneIndex>(); |
---|
216 | |
---|
217 | public static LuceneIndex getInstance(String userId) { |
---|
218 | synchronized (instances) { |
---|
219 | LuceneIndex instance = instances.get(userId); |
---|
220 | if (instance == null) { |
---|
221 | instance = new LuceneIndex(userId); |
---|
222 | instances.put(userId, instance); |
---|
223 | } |
---|
224 | return instance; |
---|
225 | } |
---|
226 | } |
---|
227 | |
---|
228 | protected LuceneIndex(String userId) { |
---|
229 | absolutePath = userAppConfig.SERVER.getArchiveDir() |
---|
230 | .resolve("index") |
---|
231 | .resolve(userId); |
---|
232 | |
---|
233 | closeTimer = new Timer("lucene-close-timer-" + userId, true); |
---|
234 | } |
---|
235 | |
---|
236 | private synchronized void insureOpenness() throws IOException { |
---|
237 | |
---|
238 | lastRequestTime = System.currentTimeMillis(); |
---|
239 | |
---|
240 | if(writer == null) { |
---|
241 | |
---|
242 | close(); |
---|
243 | |
---|
244 | System.out.println("Opening Lucene index"); |
---|
245 | |
---|
246 | directory = FSDirectory.open(absolutePath.toFile()); |
---|
247 | |
---|
248 | if(IndexWriter.isLocked(directory)) { |
---|
249 | log.warn("Lucene directory is locked"); |
---|
250 | IndexWriter.unlock(directory); |
---|
251 | } |
---|
252 | |
---|
253 | if(IndexWriter.isLocked(directory)) { |
---|
254 | log.error("Lucene directory still locked"); |
---|
255 | throw new IOException("Lucene directory locked"); |
---|
256 | } |
---|
257 | |
---|
258 | analyzer = new BrazilianAnalyzer(Version.LUCENE_35); |
---|
259 | |
---|
260 | writerConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer); |
---|
261 | writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); |
---|
262 | |
---|
263 | mergePolicy = new TieredMergePolicy(); |
---|
264 | mergePolicy.setSegmentsPerTier(4); |
---|
265 | mergePolicy.setMaxMergeAtOnce(4); |
---|
266 | mergePolicy.setNoCFSRatio(1); |
---|
267 | |
---|
268 | writerConfig.setMergePolicy(mergePolicy); |
---|
269 | |
---|
270 | writer = new IndexWriter(directory, writerConfig); |
---|
271 | |
---|
272 | closeTimer.schedule(createCloseTask(), MAX_INACTIVITY_INTERVAL); |
---|
273 | } |
---|
274 | } |
---|
275 | |
---|
276 | private TimerTask createCloseTask() { |
---|
277 | closeTask = new TimerTask() { |
---|
278 | @Override |
---|
279 | public void run() { |
---|
280 | synchronized(LuceneIndex.this) { |
---|
281 | long currentTime = System.currentTimeMillis(); |
---|
282 | long inactivityInterval = currentTime - lastRequestTime; |
---|
283 | if(inactivityInterval >= MAX_INACTIVITY_INTERVAL) { |
---|
284 | System.out.println("Closing idle Lucene index to save resources"); |
---|
285 | close(); |
---|
286 | } |
---|
287 | else { |
---|
288 | closeTimer.schedule(createCloseTask(), MAX_INACTIVITY_INTERVAL - inactivityInterval); |
---|
289 | } |
---|
290 | } |
---|
291 | } |
---|
292 | }; |
---|
293 | |
---|
294 | return closeTask; |
---|
295 | } |
---|
296 | |
---|
297 | public synchronized void close() { |
---|
298 | |
---|
299 | if(closeTask != null) { |
---|
300 | closeTask.cancel(); |
---|
301 | closeTask = null; |
---|
302 | } |
---|
303 | |
---|
304 | closeTimer.purge(); |
---|
305 | |
---|
306 | close(writer); |
---|
307 | writer = null; |
---|
308 | close(directory); |
---|
309 | directory = null; |
---|
310 | close(mergePolicy); |
---|
311 | mergePolicy = null; |
---|
312 | close(analyzer); |
---|
313 | analyzer = null; |
---|
314 | } |
---|
315 | |
---|
316 | private void close(Closeable resource) { |
---|
317 | if(resource != null) { |
---|
318 | try { |
---|
319 | resource.close(); |
---|
320 | } |
---|
321 | catch(Exception ex) { |
---|
322 | log.error(ex); |
---|
323 | } |
---|
324 | } |
---|
325 | } |
---|
326 | |
---|
327 | @Override |
---|
328 | protected void finalize() throws Throwable { |
---|
329 | try { |
---|
330 | close(); |
---|
331 | } |
---|
332 | finally { |
---|
333 | super.finalize(); |
---|
334 | } |
---|
335 | } |
---|
336 | } |
---|