[6785] | 1 | /** |
---|
| 2 | * MailArchiver is an application that provides services for storing and managing e-mail messages through a Web Services SOAP interface. |
---|
| 3 | * Copyright (C) 2012 Marcio Andre Scholl Levien and Fernando Alberto Reuter Wendt and Jose Ronaldo Nogueira Fonseca Junior |
---|
| 4 | * |
---|
| 5 | * This program is free software: you can redistribute it and/or modify |
---|
| 6 | * it under the terms of the GNU Affero General Public License as |
---|
| 7 | * published by the Free Software Foundation, either version 3 of the |
---|
| 8 | * License, or (at your option) any later version. |
---|
| 9 | * |
---|
| 10 | * This program is distributed in the hope that it will be useful, |
---|
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 13 | * GNU Affero General Public License for more details. |
---|
| 14 | * |
---|
| 15 | * You should have received a copy of the GNU Affero General Public License |
---|
| 16 | * along with this program. If not, see <http://www.gnu.org/licenses/>. |
---|
| 17 | */ |
---|
| 18 | |
---|
| 19 | /******************************************************************************\ |
---|
| 20 | * |
---|
| 21 | * This product was developed by |
---|
| 22 | * |
---|
| 23 | * SERVIÃO FEDERAL DE PROCESSAMENTO DE DADOS (SERPRO), |
---|
| 24 | * |
---|
| 25 | * a government company established under Brazilian law (5.615/70), |
---|
| 26 | * at Department of Development of Porto Alegre. |
---|
| 27 | * |
---|
| 28 | \******************************************************************************/ |
---|
| 29 | |
---|
| 30 | package serpro.mailarchiver.util; |
---|
| 31 | |
---|
| 32 | import java.io.Closeable; |
---|
| 33 | import java.io.IOException; |
---|
| 34 | import java.nio.file.Path; |
---|
| 35 | import java.util.ArrayList; |
---|
| 36 | import java.util.HashMap; |
---|
| 37 | import java.util.List; |
---|
| 38 | import java.util.Map; |
---|
| 39 | import java.util.Timer; |
---|
| 40 | import java.util.TimerTask; |
---|
| 41 | |
---|
| 42 | import org.apache.lucene.analysis.Analyzer; |
---|
| 43 | import org.apache.lucene.analysis.br.BrazilianAnalyzer; |
---|
| 44 | import org.apache.lucene.document.Document; |
---|
| 45 | import org.apache.lucene.index.IndexReader; |
---|
| 46 | import org.apache.lucene.index.IndexWriter; |
---|
| 47 | import org.apache.lucene.index.IndexWriterConfig; |
---|
| 48 | import org.apache.lucene.index.IndexWriterConfig.OpenMode; |
---|
| 49 | import org.apache.lucene.index.Term; |
---|
| 50 | import org.apache.lucene.index.TieredMergePolicy; |
---|
| 51 | import org.apache.lucene.queryParser.ParseException; |
---|
| 52 | import org.apache.lucene.queryParser.QueryParser; |
---|
| 53 | import org.apache.lucene.search.IndexSearcher; |
---|
| 54 | import org.apache.lucene.search.Query; |
---|
| 55 | import org.apache.lucene.search.ScoreDoc; |
---|
| 56 | import org.apache.lucene.search.Sort; |
---|
| 57 | import org.apache.lucene.search.SortField; |
---|
| 58 | import org.apache.lucene.search.TopDocs; |
---|
| 59 | import org.apache.lucene.store.Directory; |
---|
| 60 | import org.apache.lucene.store.FSDirectory; |
---|
| 61 | import org.apache.lucene.util.Version; |
---|
| 62 | |
---|
| 63 | import org.springframework.beans.factory.annotation.Autowired; |
---|
| 64 | import org.springframework.beans.factory.annotation.Configurable; |
---|
| 65 | |
---|
| 66 | @Configurable(preConstruction=true) |
---|
| 67 | public class LuceneIndex { |
---|
| 68 | |
---|
| 69 | private static final Logger log = Logger.getLocalLogger(); |
---|
| 70 | |
---|
| 71 | public enum Order { |
---|
| 72 | DateAsc ("date", SortField.STRING, false), |
---|
| 73 | DateDesc ("date", SortField.STRING, true), |
---|
| 74 | SubjectAsc ("subject", SortField.STRING, false), |
---|
| 75 | SubjectDesc ("subject", SortField.STRING, true), |
---|
| 76 | FromAsc ("from", SortField.STRING, false), |
---|
| 77 | FromDesc ("from", SortField.STRING, true), |
---|
| 78 | FromMboxAsc ("from_mbox", SortField.STRING, false), |
---|
| 79 | FromMboxDesc ("from_mbox", SortField.STRING, true), |
---|
| 80 | SenderAsc ("sender", SortField.STRING, false), |
---|
| 81 | SenderDesc ("sender", SortField.STRING, true), |
---|
| 82 | SenderMboxAsc ("sender_mbox", SortField.STRING, false), |
---|
| 83 | SenderMboxDesc ("sender_mbox", SortField.STRING, true), |
---|
| 84 | ToAsc ("to", SortField.STRING, false), |
---|
| 85 | ToDesc ("to", SortField.STRING, true), |
---|
| 86 | ToMboxAsc ("to_mbox", SortField.STRING, false), |
---|
| 87 | ToMboxDesc ("to_mbox", SortField.STRING, true), |
---|
| 88 | CcAsc ("cc", SortField.STRING, false), |
---|
| 89 | CcDesc ("cc", SortField.STRING, true), |
---|
| 90 | CcMboxAsc ("cc_mbox", SortField.STRING, false), |
---|
| 91 | CcMboxDesc ("cc_mbox", SortField.STRING, true), |
---|
| 92 | BccAsc ("bcc", SortField.STRING, false), |
---|
| 93 | BccDesc ("bcc", SortField.STRING, true), |
---|
| 94 | BccMboxAsc ("bcc_mbox", SortField.STRING, false), |
---|
| 95 | BccMboxDesc ("bcc_mbox", SortField.STRING, true); |
---|
| 96 | |
---|
| 97 | private final SortField sortField; |
---|
| 98 | |
---|
| 99 | Order(String field, int type, boolean reverse) { |
---|
| 100 | sortField = new SortField(field, type, reverse); |
---|
| 101 | } |
---|
| 102 | } |
---|
| 103 | |
---|
| 104 | private Directory directory; |
---|
| 105 | private Analyzer analyzer; |
---|
| 106 | private TieredMergePolicy mergePolicy; |
---|
| 107 | private IndexWriterConfig writerConfig; |
---|
| 108 | private IndexWriter writer; |
---|
| 109 | |
---|
| 110 | private static final long MAX_INACTIVITY_INTERVAL = 300000; |
---|
| 111 | private final Timer closeTimer; |
---|
| 112 | private TimerTask closeTask; |
---|
| 113 | private long lastRequestTime; |
---|
| 114 | |
---|
| 115 | public synchronized void addMessage(Document doc) throws IOException { |
---|
| 116 | |
---|
| 117 | insureOpenness(); |
---|
| 118 | |
---|
| 119 | try { |
---|
| 120 | writer.addDocument(doc); |
---|
| 121 | writer.commit(); |
---|
| 122 | } |
---|
| 123 | catch(OutOfMemoryError ex) { |
---|
| 124 | close(); |
---|
| 125 | log.error(ex); |
---|
| 126 | throw new IOException(ex); |
---|
| 127 | } |
---|
| 128 | } |
---|
| 129 | |
---|
| 130 | public synchronized void deleteMessage(String messageId) throws IOException { |
---|
| 131 | |
---|
| 132 | insureOpenness(); |
---|
| 133 | |
---|
| 134 | Term term = new Term("message_id", messageId); |
---|
| 135 | |
---|
| 136 | try { |
---|
| 137 | writer.deleteDocuments(term); |
---|
| 138 | writer.commit(); |
---|
| 139 | } |
---|
| 140 | catch(OutOfMemoryError ex) { |
---|
| 141 | close(); |
---|
| 142 | log.error(ex); |
---|
| 143 | throw new IOException(ex); |
---|
| 144 | } |
---|
| 145 | } |
---|
| 146 | |
---|
| 147 | public synchronized String[] search(String queryExpression, Order... sortOrder) throws IOException { |
---|
| 148 | |
---|
| 149 | insureOpenness(); |
---|
| 150 | |
---|
| 151 | IndexReader reader = null; |
---|
| 152 | IndexSearcher searcher = null; |
---|
| 153 | |
---|
| 154 | try { |
---|
| 155 | reader = IndexReader.open(writer, true); |
---|
| 156 | searcher = new IndexSearcher(reader); |
---|
| 157 | |
---|
| 158 | QueryParser parser = new QueryParser(Version.LUCENE_35, "body", analyzer); |
---|
| 159 | //ComplexPhraseQueryParser parser = new ComplexPhraseQueryParser(Version.LUCENE_35, "body", analyzer); |
---|
| 160 | parser.setAllowLeadingWildcard(true); |
---|
| 161 | |
---|
| 162 | try { |
---|
| 163 | Query query = parser.parse(queryExpression); |
---|
| 164 | |
---|
| 165 | List<SortField> sortFields = new ArrayList<SortField>(); |
---|
| 166 | for(Order order : sortOrder) { |
---|
| 167 | if( ! sortFields.contains(order.sortField)) { |
---|
| 168 | sortFields.add(order.sortField); |
---|
| 169 | } |
---|
| 170 | } |
---|
| 171 | |
---|
| 172 | TopDocs results; |
---|
| 173 | |
---|
| 174 | if(sortFields.size() > 0) { |
---|
| 175 | Sort sort = new Sort(); |
---|
| 176 | sort.setSort(sortFields.toArray(new SortField[sortFields.size()])); |
---|
| 177 | results = searcher.search(query, Integer.MAX_VALUE, sort); |
---|
| 178 | } |
---|
| 179 | else { |
---|
| 180 | results = searcher.search(query, Integer.MAX_VALUE); |
---|
| 181 | } |
---|
| 182 | |
---|
| 183 | ScoreDoc[] hits = results.scoreDocs; |
---|
| 184 | |
---|
| 185 | String[] oids = new String[hits.length]; |
---|
| 186 | |
---|
| 187 | for(int i = 0; i < hits.length; i++) { |
---|
| 188 | Document doc = searcher.doc(hits[i].doc); |
---|
| 189 | oids[i] = doc.get("message_id"); |
---|
| 190 | } |
---|
| 191 | |
---|
| 192 | return oids; |
---|
| 193 | } |
---|
| 194 | catch(ParseException ex) { |
---|
| 195 | log.error(ex); |
---|
| 196 | return new String[]{}; |
---|
| 197 | } |
---|
| 198 | } |
---|
| 199 | finally { |
---|
| 200 | close(searcher); |
---|
| 201 | close(reader); |
---|
| 202 | } |
---|
| 203 | } |
---|
| 204 | |
---|
| 205 | //-------------------------------------------------------------------------- |
---|
| 206 | @Autowired |
---|
| 207 | private UserAppConfig userAppConfig; |
---|
| 208 | |
---|
| 209 | private final Path absolutePath; |
---|
| 210 | |
---|
| 211 | public final Path getAbsolutePath() { |
---|
| 212 | return absolutePath; |
---|
| 213 | } |
---|
| 214 | |
---|
| 215 | private static final Map<String, LuceneIndex> instances = new HashMap<String, LuceneIndex>(); |
---|
| 216 | |
---|
| 217 | public static LuceneIndex getInstance(String userId) { |
---|
| 218 | synchronized (instances) { |
---|
| 219 | LuceneIndex instance = instances.get(userId); |
---|
| 220 | if (instance == null) { |
---|
| 221 | instance = new LuceneIndex(userId); |
---|
| 222 | instances.put(userId, instance); |
---|
| 223 | } |
---|
| 224 | return instance; |
---|
| 225 | } |
---|
| 226 | } |
---|
| 227 | |
---|
| 228 | protected LuceneIndex(String userId) { |
---|
| 229 | absolutePath = userAppConfig.SERVER.getArchiveDir() |
---|
| 230 | .resolve("index") |
---|
| 231 | .resolve(userId); |
---|
| 232 | |
---|
| 233 | closeTimer = new Timer("lucene-close-timer-" + userId, true); |
---|
| 234 | } |
---|
| 235 | |
---|
| 236 | private synchronized void insureOpenness() throws IOException { |
---|
| 237 | |
---|
| 238 | lastRequestTime = System.currentTimeMillis(); |
---|
| 239 | |
---|
| 240 | if(writer == null) { |
---|
| 241 | |
---|
| 242 | close(); |
---|
| 243 | |
---|
| 244 | System.out.println("Opening Lucene index"); |
---|
| 245 | |
---|
| 246 | directory = FSDirectory.open(absolutePath.toFile()); |
---|
| 247 | |
---|
| 248 | if(IndexWriter.isLocked(directory)) { |
---|
| 249 | log.warn("Lucene directory is locked"); |
---|
| 250 | IndexWriter.unlock(directory); |
---|
| 251 | } |
---|
| 252 | |
---|
| 253 | if(IndexWriter.isLocked(directory)) { |
---|
| 254 | log.error("Lucene directory still locked"); |
---|
| 255 | throw new IOException("Lucene directory locked"); |
---|
| 256 | } |
---|
| 257 | |
---|
| 258 | analyzer = new BrazilianAnalyzer(Version.LUCENE_35); |
---|
| 259 | |
---|
| 260 | writerConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer); |
---|
| 261 | writerConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); |
---|
| 262 | |
---|
| 263 | mergePolicy = new TieredMergePolicy(); |
---|
| 264 | mergePolicy.setSegmentsPerTier(4); |
---|
| 265 | mergePolicy.setMaxMergeAtOnce(4); |
---|
| 266 | mergePolicy.setNoCFSRatio(1); |
---|
| 267 | |
---|
| 268 | writerConfig.setMergePolicy(mergePolicy); |
---|
| 269 | |
---|
| 270 | writer = new IndexWriter(directory, writerConfig); |
---|
| 271 | |
---|
| 272 | closeTimer.schedule(createCloseTask(), MAX_INACTIVITY_INTERVAL); |
---|
| 273 | } |
---|
| 274 | } |
---|
| 275 | |
---|
| 276 | private TimerTask createCloseTask() { |
---|
| 277 | closeTask = new TimerTask() { |
---|
| 278 | @Override |
---|
| 279 | public void run() { |
---|
| 280 | synchronized(LuceneIndex.this) { |
---|
| 281 | long currentTime = System.currentTimeMillis(); |
---|
| 282 | long inactivityInterval = currentTime - lastRequestTime; |
---|
| 283 | if(inactivityInterval >= MAX_INACTIVITY_INTERVAL) { |
---|
| 284 | System.out.println("Closing idle Lucene index to save resources"); |
---|
| 285 | close(); |
---|
| 286 | } |
---|
| 287 | else { |
---|
| 288 | closeTimer.schedule(createCloseTask(), MAX_INACTIVITY_INTERVAL - inactivityInterval); |
---|
| 289 | } |
---|
| 290 | } |
---|
| 291 | } |
---|
| 292 | }; |
---|
| 293 | |
---|
| 294 | return closeTask; |
---|
| 295 | } |
---|
| 296 | |
---|
| 297 | public synchronized void close() { |
---|
| 298 | |
---|
| 299 | if(closeTask != null) { |
---|
| 300 | closeTask.cancel(); |
---|
| 301 | closeTask = null; |
---|
| 302 | } |
---|
| 303 | |
---|
| 304 | closeTimer.purge(); |
---|
| 305 | |
---|
| 306 | close(writer); |
---|
| 307 | writer = null; |
---|
| 308 | close(directory); |
---|
| 309 | directory = null; |
---|
| 310 | close(mergePolicy); |
---|
| 311 | mergePolicy = null; |
---|
| 312 | close(analyzer); |
---|
| 313 | analyzer = null; |
---|
| 314 | } |
---|
| 315 | |
---|
| 316 | private void close(Closeable resource) { |
---|
| 317 | if(resource != null) { |
---|
| 318 | try { |
---|
| 319 | resource.close(); |
---|
| 320 | } |
---|
| 321 | catch(Exception ex) { |
---|
| 322 | log.error(ex); |
---|
| 323 | } |
---|
| 324 | } |
---|
| 325 | } |
---|
| 326 | |
---|
| 327 | @Override |
---|
| 328 | protected void finalize() throws Throwable { |
---|
| 329 | try { |
---|
| 330 | close(); |
---|
| 331 | } |
---|
| 332 | finally { |
---|
| 333 | super.finalize(); |
---|
| 334 | } |
---|
| 335 | } |
---|
| 336 | } |
---|