/** * MailArchiver is an application that provides services for storing and managing e-mail messages through a Web Services SOAP interface. * Copyright (C) 2012 Marcio Andre Scholl Levien and Fernando Alberto Reuter Wendt and Jose Ronaldo Nogueira Fonseca Junior * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ /******************************************************************************\ * * This product was developed by * * SERVIÇO FEDERAL DE PROCESSAMENTO DE DADOS (SERPRO), * * a government company established under Brazilian law (5.615/70), * at Department of Development of Porto Alegre. * \******************************************************************************/ package serpro.mailarchiver.service.web; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.nio.file.Path; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.Arrays; import java.util.Date; import java.util.Map.Entry; import java.util.Stack; import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.jdo.annotations.PersistenceAware; import org.apache.james.mime4j.codec.DecodeMonitor; import org.apache.james.mime4j.codec.DecoderUtil; import org.apache.james.mime4j.io.LineNumberInputStream; import org.apache.james.mime4j.message.SimpleContentHandler; import org.apache.james.mime4j.parser.MimeStreamParser; import org.apache.james.mime4j.stream.BodyDescriptor; import org.apache.james.mime4j.stream.MimeConfig; import org.apache.james.mime4j.util.CharsetUtil; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.DateTools.Resolution; import org.apache.lucene.document.Document; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.springframework.beans.factory.annotation.Autowired; import com.google.common.base.Supplier; import com.google.common.io.Files; import net.htmlparser.jericho.Element; import net.htmlparser.jericho.Source; import net.htmlparser.jericho.TextExtractor; import serpro.mailarchiver.domain.metaarchive.*; import serpro.mailarchiver.service.BaseService; import serpro.mailarchiver.service.dto.TMessage; import serpro.mailarchiver.service.find.FFolder; import serpro.mailarchiver.service.find.FMessage; import serpro.mailarchiver.session.Session; import serpro.mailarchiver.util.Charsets; import serpro.mailarchiver.util.Logger; import serpro.mailarchiver.util.jdo.PersistenceManager; import serpro.mailarchiver.util.transaction.WithReadWriteTx; @PersistenceAware public class DefaultArchiveOperation extends BaseService implements ArchiveOperation { private static class ExpungedException extends RuntimeException {} private static class RepeatedException extends RuntimeException {} private static final Logger log = Logger.getLocalLogger(); private static final String CR = "\015"; private static final String LF = "\012"; private static final String CRLF = "\015\012"; @Autowired private FFolder findFolder; @Autowired private FMessage findMessage; @WithReadWriteTx @Override public TMessage apply(String folderId, String message) throws ServiceFault { PersistenceManager pm = getPersistenceManager(); Folder folder = findFolder.byId(folderId); if(folder == null) { ServiceFault.folderNotFound() .setActor("archive") .setMessage("Destination folder not found.") .addValue("folderId", folderId) .raise(); } MimeConfig config = new MimeConfig(); config.setStrictParsing(false); config.setMaxLineLen(-1); config.setMaxContentLen(-1); config.setMaxHeaderLen(-1); config.setMaxHeaderCount(-1); Message rootMessage = null; MessageHandler handler = null; try { handler = new MessageHandler(folder); MimeStreamParser parser = new MimeStreamParser(config); parser.setContentHandler(handler); parser.setContentDecoding(true); byte[] ba = message.getBytes(Charsets.Windows_1252); switch(ba[ba.length - 1]) { case 0x0A: //LF break; case 0x0D: //CR ba = Arrays.copyOf(ba, ba.length + 1); ba[ba.length - 1] = 0x0A; break; default: ba = Arrays.copyOf(ba, ba.length + 2); ba[ba.length - 2] = 0x0D; ba[ba.length - 1] = 0x0A; break; } parser.parse(new ByteArrayInputStream(ba)); //debug //MimeTokenStream mimeTokenStream = parser.getMimeTokenStream(); //LineNumberInputStream.Entity lineNumberRootEntity = mimeTokenStream.getLineNumberRootEntity(); //String dump = lineNumberRootEntity.dump("root entity"); //System.out.println(dump); rootMessage = handler.getRootMessage(); rootMessage.sync(); rootMessage.setFolder(folder); rootMessage.setUnseen(true); rootMessage.setUnanswered(true); UnstructuredField importanceField = (UnstructuredField)rootMessage.getField("importance"); if(importanceField != null) { String importance = importanceField.getText().toLowerCase().trim(); if("high".equals(importance)) { rootMessage.setImportanceHigh(true); } else if("low".equals(importance)) { rootMessage.setImportanceLow(true); } else { rootMessage.setImportanceNormal(true); } } pm.makePersistent(rootMessage); Path msgFile = folder.getAbsolutePath().resolve(rootMessage.getOid() + ".eml"); Files.write(ba, msgFile.toFile()); Document luceneDoc = handler.getLuceneDoc(); Session.getLuceneIndex().addMessage(luceneDoc); final Message rootMsg = rootMessage; log.info("Mensagem arquivada com sucesso:%n%n%1$s", new Supplier() { @Override public Object[] get() { String msgDOM = rootMsg.dumpTree(); //debug //System.out.println(msgDOM); return new Object[] { msgDOM }; } }); } catch(ExpungedException e) { ServiceFault.expungedMessage() .setActor("archive") .raise(); } catch(RepeatedException e) { rootMessage = handler.getRootMessage(); } catch(Throwable t) { ServiceFault.archiveFailure() .setActor("archive") .setMessage("Archive failure.") .setCause(t) .raise(); } return new TMessage(rootMessage); } private class MessageHandler extends SimpleContentHandler { private static final int MSG_FLAG_EXPUNGED = 0x0008; private Stack entityStack; private Stack multipartStack; private final Folder folder; private Message rootMessage; private LineNumberInputStream.Entity rootLnisEntity; private Document luceneDoc; private boolean parsingRoot; Message getRootMessage() { return rootMessage; } Document getLuceneDoc() { return luceneDoc; } public MessageHandler(Folder folder) { this.folder = folder; entityStack = new Stack(); multipartStack = new Stack(); } //message_id private void addStoredNotAnalyzedIndexField(String name, String value) { org.apache.lucene.document.Field field = new org.apache.lucene.document.Field( name, value, org.apache.lucene.document.Field.Store.YES, org.apache.lucene.document.Field.Index.NOT_ANALYZED); field.setIndexOptions(IndexOptions.DOCS_ONLY); luceneDoc.add(field); } //date, from_mbox, sender_mbox, to_mbox, cc_mbox, bcc_mbox private void addNotAnalyzedIndexField(String name, String value) { org.apache.lucene.document.Field field = new org.apache.lucene.document.Field( name, value, org.apache.lucene.document.Field.Store.NO, org.apache.lucene.document.Field.Index.NOT_ANALYZED); field.setIndexOptions(IndexOptions.DOCS_ONLY); luceneDoc.add(field); } //subject, from, sender, to, cc, bcc, body private void addAnalyzedIndexField(String name, String value) { org.apache.lucene.document.Field field = new org.apache.lucene.document.Field( name, value, org.apache.lucene.document.Field.Store.NO, org.apache.lucene.document.Field.Index.ANALYZED); field.setIndexOptions(IndexOptions.DOCS_ONLY); luceneDoc.add(field); } private String decodeEncodedWords(String text) { String decoded = DecoderUtil.decodeEncodedWords(text, new DecodeMonitor() { @Override public boolean warn(String error, String dropDesc) { log.warn("error: %s\ndropDesc: %s", error, dropDesc); return false; } }); //debug //if( ! decoded.equals(text)) { // System.out.println(String.format("original: %s\ndecoded: %s", text, decoded)); //} return decoded; } @Override public void startMessage(LineNumberInputStream.Entity lnisEntity) { if(entityStack.empty()) { //o empilhamento da mensagem raiz é postergado até //a leitura do primeiro header para verificar expungimento rootLnisEntity = lnisEntity; return; } final MessageBody messageBody = new MessageBody(); messageBody.setEntity(entityStack.peek()); final EmbeddedMessage embeddedMessage = new EmbeddedMessage(lnisEntity); embeddedMessage.setComposite(messageBody); entityStack.push(embeddedMessage); log.debug("Mime Parse Event: START EMBEDDED MESSAGE\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = embeddedMessage.dumpPath(); //System.out.println("START EMBEDDED MESSAGE"); //System.out.println(domPath); return new Object[] { domPath }; } }); } @Override public void startMultipart(BodyDescriptor bd) { final Multipart multipart = new Multipart(); multipart.setEntity(entityStack.peek()); multipartStack.push(multipart); log.debug("Mime Parse Event: START MULTIPART\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = multipart.dumpPath(); //System.out.println("START MULTIPART"); //System.out.println(domPath); return new Object[] { domPath }; } }); } @Override public void preamble(InputStream is) { Multipart multipart = multipartStack.peek(); multipart.setPreambleStream(is); } @Override public void startBodyPart(LineNumberInputStream.Entity lnisEntity) { final BodyPart bodyPart = new BodyPart(lnisEntity); bodyPart.setComposite(multipartStack.peek()); entityStack.push(bodyPart); log.debug("Mime Parse Event: START BODY PART\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = bodyPart.dumpPath(); //System.out.println("START BODY PART"); //System.out.println(domPath); return new Object[] { domPath }; } }); } @Override public void headers(org.apache.james.mime4j.dom.Header hd) { if(entityStack.empty()) { parsingRoot = true; //o empilhamento da mensagem raiz é postergado até a leitura do primeiro header para //verificar se a mensagem está marcada como expungida pelo Thunderbird, isto é, //se a mesma foi apagada ou movida de uma pasta que não foi compactada após este evento org.apache.james.mime4j.dom.field.UnstructuredField field = (org.apache.james.mime4j.dom.field.UnstructuredField) hd.getField("X-Mozilla-Status"); if(field != null) { if((Integer.parseInt(field.getValue(), 16) & MSG_FLAG_EXPUNGED) > 0) { log.debug(">>EXPUNGED<<%n%n%s", hd.toString()); throw new ExpungedException(); } } try { MessageDigest md = MessageDigest.getInstance("MD5"); md.update(folder.getOid().getBytes("UTF-8")); md.update(hd.toString().getBytes("UTF-8")); char[] hexDigest = new char[32]; int i = 0; for(byte b : md.digest()) { hexDigest[i++] = Character.forDigit((char)((b & 0xf0) >>> 4), 16); hexDigest[i++] = Character.forDigit((char)(b & 0x0f), 16); } String digest = new String(hexDigest); String guid = digest.substring(0, 8) + "-" + digest.substring(8, 12) + "-" + digest.substring(12, 16) + "-" + digest.substring(16, 20) + "-" + digest.substring(20, 32); rootMessage = findMessage.byId(guid); if(rootMessage == null) { rootMessage = new Message(rootLnisEntity); rootMessage.setOid(guid); luceneDoc = new Document(); } else if(rootMessage.getFolder() == folder) { log.warn(">>REPEATED<<%n%nFolder:%s%n%n%s", folder.getRelativePath(), hd.toString()); throw new RepeatedException(); } else { rootMessage = new Message(rootLnisEntity); rootMessage.setOid(UUID.randomUUID().toString()); luceneDoc = new Document(); } } catch(NoSuchAlgorithmException ex) { } catch(UnsupportedEncodingException ex) { } entityStack.push(rootMessage); addStoredNotAnalyzedIndexField("message_id", rootMessage.getOid()); log.debug("Mime Parse Event: START MESSAGE\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = rootMessage.dumpPath(); //System.out.println("START MESSAGE"); //System.out.println(domPath); return new Object[] { domPath }; } }); } else { parsingRoot = false; } for(org.apache.james.mime4j.stream.Field f : hd) { final String fldName = f.getName(); // if(f instanceof org.apache.james.mime4j.dom.field.UnstructuredField) { //********************************************************** // // UNSTRUCTURED FIELD // // "Subject", "Message-Id", "Resent-Msg-Id", // "Comments", "Keywords", etc // //********************************************************** org.apache.james.mime4j.dom.field.UnstructuredField uf = (org.apache.james.mime4j.dom.field.UnstructuredField) f; final UnstructuredField field = new UnstructuredField(); field.setName(fldName); field.setValid(uf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = uf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String value = uf.getValue(); if(value != null) { if(("Message-ID".equalsIgnoreCase(fldName)) || ("Resent-Message-ID".equalsIgnoreCase(fldName))) { Matcher m = Pattern.compile("<([^>]+)>").matcher(value); if(m.find()) { field.setText(m.group(1)); } else { log.error("%s field does not match msg-id format: %s", fldName, value); field.setText(value); } } else { value = decodeEncodedWords(value); field.setText(value); } } field.setEntity(entityStack.peek()); if(parsingRoot && (value != null)) { if("subject".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("subject", value); } } log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("UNSTRUCTURED FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.MimeVersionField) { //********************************************************** // // MIME VERSION FIELD // //********************************************************** org.apache.james.mime4j.dom.field.MimeVersionField mvf = (org.apache.james.mime4j.dom.field.MimeVersionField) f; final UnstructuredField field = new UnstructuredField(); field.setName(fldName); field.setValid(mvf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = mvf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String mimeVersion = mvf.getMajorVersion() + "." + mvf.getMinorVersion(); field.setText(mimeVersion); field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("UNSTRUCTURED FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.ContentMD5Field) { //********************************************************** // // CONTENT MD5 FIELD // //********************************************************** org.apache.james.mime4j.dom.field.ContentMD5Field cmd5f = (org.apache.james.mime4j.dom.field.ContentMD5Field) f; final UnstructuredField field = new UnstructuredField(); field.setName(fldName); field.setValid(cmd5f.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = cmd5f.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String md5raw = cmd5f.getMD5Raw(); if(md5raw != null) { field.setText(md5raw); } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("UNSTRUCTURED FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.ContentLocationField) { //********************************************************** // // CONTENT LOCATION FIELD // //********************************************************** org.apache.james.mime4j.dom.field.ContentLocationField clf = (org.apache.james.mime4j.dom.field.ContentLocationField) f; final UnstructuredField field = new UnstructuredField(); field.setName(fldName); field.setValid(clf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = clf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String location = clf.getLocation(); if(location != null) { field.setText(location); } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("UNSTRUCTURED FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.ContentLengthField) { //********************************************************** // // CONTENT LENGTH FIELD // //********************************************************** org.apache.james.mime4j.dom.field.ContentLengthField clf = (org.apache.james.mime4j.dom.field.ContentLengthField) f; final UnstructuredField field = new UnstructuredField(); field.setName(fldName); field.setValid(clf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = clf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String length = Long.toString(clf.getContentLength()); field.setText(length); field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("UNSTRUCTURED FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.ContentLanguageField) { //********************************************************** // // CONTENT LANGUAGE FIELD // //********************************************************** org.apache.james.mime4j.dom.field.ContentLanguageField clf = (org.apache.james.mime4j.dom.field.ContentLanguageField) f; final UnstructuredField field = new UnstructuredField(); field.setName(fldName); field.setValid(clf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = clf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String languages = null; for(String language : clf.getLanguages()) { if(languages == null) { languages = language; } else { languages += "|" + language; } } if(languages != null) { field.setText(languages); } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("UNSTRUCTURED FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.ContentIdField) { //********************************************************** // // CONTENT ID FIELD // //********************************************************** org.apache.james.mime4j.dom.field.ContentIdField cidf = (org.apache.james.mime4j.dom.field.ContentIdField) f; final UnstructuredField field = new UnstructuredField(); field.setName(fldName); field.setValid(cidf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = cidf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String cid = cidf.getId(); if(cid != null) { Matcher m = Pattern.compile("<([^>]+)>").matcher(cid); if(m.find()) { field.setText(m.group(1)); } else { log.error("%s field does not match content-id format: %s", fldName, cid); field.setText(cid); } } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("UNSTRUCTURED FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.ContentDescriptionField) { //********************************************************** // // CONTENT DESCRIPTION FIELD // //********************************************************** org.apache.james.mime4j.dom.field.ContentDescriptionField cdf = (org.apache.james.mime4j.dom.field.ContentDescriptionField) f; final UnstructuredField field = new UnstructuredField(); field.setName(fldName); field.setValid(cdf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = cdf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String description = cdf.getDescription(); if(description != null) { description = decodeEncodedWords(description); field.setText(description); } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("UNSTRUCTURED FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.ContentTransferEncodingField) { //********************************************************** // // CONTENT TRANSFER ENCODING FIELD // //********************************************************** org.apache.james.mime4j.dom.field.ContentTransferEncodingField ctef = (org.apache.james.mime4j.dom.field.ContentTransferEncodingField) f; final UnstructuredField field = new UnstructuredField(); field.setName(fldName); field.setValid(ctef.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = ctef.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String encoding = ctef.getEncoding(); if(encoding != null) { field.setText(encoding); } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: UNSTRUCTURED FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("UNSTRUCTURED FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.ContentTypeField) { //********************************************************** // // CONTENT TYPE FIELD // //********************************************************** org.apache.james.mime4j.dom.field.ContentTypeField ctf = (org.apache.james.mime4j.dom.field.ContentTypeField) f; final ContentTypeField field = new ContentTypeField(); field.setName(fldName); field.setValid(ctf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = ctf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String mimeType = ctf.getMimeType(); if(mimeType != null) { String[] mimeTypeSplit = mimeType.split("/"); field.setMediaType(mimeTypeSplit[0]); field.setSubType(mimeTypeSplit[1]); } for(Entry entry : ctf.getParameters().entrySet()) { String key = entry.getKey(); String value = entry.getValue(); if((key != null) && (!key.isEmpty()) && (value != null) && (!value.isEmpty())) { value = decodeEncodedWords(value); field.addParameter(key, value); } } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: CONTENT TYPE FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("CONTENT TYPE FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.ContentDispositionField) { //********************************************************** // // CONTENT DISPOSITION FIELD // //********************************************************** org.apache.james.mime4j.dom.field.ContentDispositionField cdf = (org.apache.james.mime4j.dom.field.ContentDispositionField) f; final ContentDispositionField field = new ContentDispositionField(); field.setName(fldName); field.setValid(cdf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = cdf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } String dispositionType = cdf.getDispositionType(); if(dispositionType != null) { field.setDispositionType(dispositionType); } for(Entry entry : cdf.getParameters().entrySet()) { String key = entry.getKey(); String value = entry.getValue(); if((key != null) && (!key.isEmpty()) && (value != null) && (!value.isEmpty())) { value = decodeEncodedWords(value); field.addParameter(key, value); } } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: CONTENT DISPOSITION FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("CONTENT DISPOSITION FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.DateTimeField) { //********************************************************** // // DATE TIME FIELD // // "Date", "Resent-Date" // //********************************************************** org.apache.james.mime4j.dom.field.DateTimeField dtf = (org.apache.james.mime4j.dom.field.DateTimeField) f; final DateTimeField field = new DateTimeField(); field.setName(fldName); field.setValid(dtf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = dtf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } Date date = dtf.getDate(); if(date != null) { field.setDate(date); } field.setEntity(entityStack.peek()); if(parsingRoot && (date != null)) { if("date".equalsIgnoreCase(fldName)) { addNotAnalyzedIndexField("date", DateTools.dateToString(date, Resolution.SECOND)); } } log.debug("Mime Parse Event: DATE TIME FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("DATE TIME FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // // else if(f instanceof org.apache.james.mime4j.dom.field.AddressListField) { //********************************************************** // // ADDRESS LIST FIELD // // "To", "Cc", "Bcc", "Reply-To", // "Resent-To", "Resent-Cc", "Resent-Bcc" // //********************************************************** org.apache.james.mime4j.dom.field.AddressListField alf = (org.apache.james.mime4j.dom.field.AddressListField) f; final AddressListField field = new AddressListField(); field.setName(fldName); field.setValid(alf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = alf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: ADDRESS LIST FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("ADDRESS LIST FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); for(org.apache.james.mime4j.dom.address.Address a : alf.getAddressList()) { if(a instanceof org.apache.james.mime4j.dom.address.Mailbox) { //************************************************** // // address list field MAILBOX // //************************************************** org.apache.james.mime4j.dom.address.Mailbox m = (org.apache.james.mime4j.dom.address.Mailbox) a; final AddressListField_Mailbox mailbox = new AddressListField_Mailbox(); String mname = m.getName(); if(mname != null) { mname = decodeEncodedWords(mname); mailbox.setName(mname); } String localPart = m.getLocalPart(); if(localPart != null) { mailbox.setLocalPart(localPart); } String domain = m.getDomain(); if(domain != null) { mailbox.setDomain(domain); } org.apache.james.mime4j.dom.address.DomainList dl = m.getRoute(); if(dl.size() > 0) { mailbox.setRoute(dl.toRouteString()); } mailbox.setField(field); if(parsingRoot && (mname != null)) { if("to".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("to", mname); } else if("cc".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("cc", mname); } else if("bcc".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("bcc", mname); } } if(parsingRoot && (localPart != null) && (domain != null)) { String mbox = localPart + "@" + domain; if("to".equalsIgnoreCase(fldName)) { addNotAnalyzedIndexField("to_mbox", mbox); } else if("cc".equalsIgnoreCase(fldName)) { addNotAnalyzedIndexField("cc_mbox", mbox); } else if("bcc".equalsIgnoreCase(fldName)) { addNotAnalyzedIndexField("bcc_mbox", mbox); } } log.debug("Mime Parse Event: ADDRESS LIST FIELD MAILBOX\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = mailbox.dumpPath(); //System.out.println("ADDRESS LIST FIELD MAILBOX"); //System.out.println(domPath); return new Object[] { domPath }; } }); } else if(a instanceof org.apache.james.mime4j.dom.address.Group) { //************************************************** // // address list field GROUP // //************************************************** org.apache.james.mime4j.dom.address.Group g = (org.apache.james.mime4j.dom.address.Group) a; final AddressListField_Group group = new AddressListField_Group(); String gname = g.getName(); if(gname != null) { gname = decodeEncodedWords(gname); group.setName(gname); } group.setField(field); if(parsingRoot && (gname != null)) { if("to".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("to", gname); } else if("cc".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("cc", gname); } else if("bcc".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("bcc", gname); } } log.debug("Mime Parse Event: GROUP\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = group.dumpPath(); //System.out.println("GROUP"); //System.out.println(domPath); return new Object[] { domPath }; } }); for(org.apache.james.mime4j.dom.address.Mailbox m : g.getMailboxes()) { //********************************************** // // address list field GROUP MAILBOX // //********************************************** final AddressListField_Group_Mailbox mailbox = new AddressListField_Group_Mailbox(); String mname = m.getName(); if(mname != null) { mname = decodeEncodedWords(mname); mailbox.setName(mname); } String localPart = m.getLocalPart(); if(localPart != null) { mailbox.setLocalPart(localPart); } String domain = m.getDomain(); if(domain != null) { mailbox.setDomain(domain); } org.apache.james.mime4j.dom.address.DomainList dl = m.getRoute(); if(dl.size() > 0) { mailbox.setRoute(dl.toRouteString()); } mailbox.setGroup(group); if(parsingRoot && (mname != null)) { if("to".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("to", mname); } else if("cc".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("cc", mname); } else if("bcc".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("bcc", mname); } } if(parsingRoot && (localPart != null) && (domain != null)) { String mbox = localPart + "@" + domain; if("to".equalsIgnoreCase(fldName)) { addNotAnalyzedIndexField("to_mbox", mbox); } else if("cc".equalsIgnoreCase(fldName)) { addNotAnalyzedIndexField("cc_mbox", mbox); } else if("bcc".equalsIgnoreCase(fldName)) { addNotAnalyzedIndexField("bcc_mbox", mbox); } } log.debug("Mime Parse Event: GROUP MAILBOX\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = mailbox.dumpPath(); //System.out.println("GROUP MAILBOX"); //System.out.println(domPath); return new Object[] { domPath }; } }); } } } } // // else if(f instanceof org.apache.james.mime4j.dom.field.MailboxListField) { //********************************************************** // // MAILBOX LIST FIELD // // "From", "Resent-From" // //********************************************************** org.apache.james.mime4j.dom.field.MailboxListField mlf = (org.apache.james.mime4j.dom.field.MailboxListField) f; final MailboxListField field = new MailboxListField(); field.setName(fldName); field.setValid(mlf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = mlf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: MAILBOX LIST FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("MAILBOX LIST FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); for(org.apache.james.mime4j.dom.address.Mailbox m : mlf.getMailboxList()) { //****************************************************** // // mailbox list field MAILBOX // //****************************************************** final MailboxListField_Mailbox mailbox = new MailboxListField_Mailbox(); String mname = m.getName(); if(mname != null) { mname = decodeEncodedWords(mname); mailbox.setName(mname); } String localPart = m.getLocalPart(); if(localPart != null) { mailbox.setLocalPart(localPart); } String domain = m.getDomain(); if(domain != null) { mailbox.setDomain(domain); } org.apache.james.mime4j.dom.address.DomainList dl = m.getRoute(); if(dl.size() > 0) { mailbox.setRoute(dl.toRouteString()); } mailbox.setField(field); if(parsingRoot && (mname != null)) { if("from".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("from", mname); } } if(parsingRoot && (localPart != null) && (domain != null)) { String mbox = localPart + "@" + domain; if("from".equalsIgnoreCase(fldName)) { addNotAnalyzedIndexField("from_mbox", mbox); } } log.debug("Mime Parse Event: MAILBOX LIST FIELD MAILBOX\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = mailbox.dumpPath(); //System.out.println("MAILBOX LIST FIELD MAILBOX"); //System.out.println(domPath); return new Object[] { domPath }; } }); } } // // else if(f instanceof org.apache.james.mime4j.dom.field.MailboxField) { //********************************************************** // // MAILBOX FIELD // // "Sender", "Resent-Sender" // //********************************************************** org.apache.james.mime4j.dom.field.MailboxField mf = (org.apache.james.mime4j.dom.field.MailboxField) f; final MailboxField field = new MailboxField(); field.setName(fldName); field.setValid(mf.isValidField()); if(!field.isValid()) { org.apache.james.mime4j.dom.field.ParseException e = mf.getParseException(); if(e != null) { field.addParseExceptionStackTrace(e); } } field.setEntity(entityStack.peek()); log.debug("Mime Parse Event: MAILBOX FIELD\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = field.dumpPath(); //System.out.println("MAILBOX FIELD"); //System.out.println(domPath); return new Object[] { domPath }; } }); //********************************************************** // // mailbox field MAILBOX // //********************************************************** org.apache.james.mime4j.dom.address.Mailbox m = mf.getMailbox(); final MailboxField_Mailbox mailbox = new MailboxField_Mailbox(); String mname = m.getName(); if(mname != null) { mname = decodeEncodedWords(mname); mailbox.setName(mname); } String localPart = m.getLocalPart(); if(localPart != null) { mailbox.setLocalPart(localPart); } String domain = m.getDomain(); if(domain != null) { mailbox.setDomain(domain); } org.apache.james.mime4j.dom.address.DomainList dl = m.getRoute(); if(dl.size() > 0) { mailbox.setRoute(dl.toRouteString()); } mailbox.setField(field); if(parsingRoot && (mname != null)) { if("sender".equalsIgnoreCase(fldName)) { addAnalyzedIndexField("sender", mname); } } if(parsingRoot && (localPart != null) && (domain != null)) { String mbox = localPart + "@" + domain; if("sender".equalsIgnoreCase(fldName)) { addNotAnalyzedIndexField("sender_mbox", mbox); } } log.debug("Mime Parse Event: MAILBOX FIELD MAILBOX\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = mailbox.dumpPath(); //System.out.println("MAILBOX FIELD MAILBOX"); //System.out.println(domPath); return new Object[] { domPath }; } }); } // } } @Override public void body(BodyDescriptor bd, InputStream is) { boolean isText = bd.getMediaType().equalsIgnoreCase("text"); final SingleBody singleBody = isText ? new TextBody() : new BinaryBody(); singleBody.setOid(UUID.randomUUID().toString()); singleBody.setEntity(entityStack.peek()); int size = 0; int capacity = 0x10000; byte[] textBuf = null; if(isText) { textBuf = new byte[capacity]; } try { byte[] buf = new byte[0x1000]; int n; while((n = is.read(buf)) >= 0) { if(isText) { if((size + n) >= capacity) { capacity += 0x10000; textBuf = Arrays.copyOf(textBuf, capacity); } System.arraycopy(buf, 0, textBuf, size, n); } size += n; } } catch(IOException ex) { log.error(ex); } singleBody.setSize(size); singleBody.getEntity().incSize(size); if(isText) { Charset cs = CharsetUtil.lookup(bd.getCharset()); if(cs == null) { cs = Charsets.Windows_1252; } String text = new String(textBuf, 0, size, cs); //debug //System.out.println("--- text body ----"); //System.out.println(text); //System.out.println("------------------"); String textExt; if(bd.getSubType().equalsIgnoreCase("html")) { Source html = new Source(text); html.fullSequentialParse(); Element body = html.getFirstElement("body"); TextExtractor textExtractor = (body != null) ? new TextExtractor(body) : new TextExtractor(html); textExt = textExtractor.toString(); } else { textExt = text.replaceAll("(" + CRLF + "|" + CR + "|" + LF + ")+", " "); } String preview = textExt.substring(0, Math.min(textExt.length(), 300)); //debug //System.out.println("---- preview -----"); //System.out.println(preview); //System.out.println("------------------"); ((TextBody)singleBody).setPreview(preview); addAnalyzedIndexField("body", textExt); } log.debug("Mime Parse Event: SINGLE BODY\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = singleBody.dumpPath(); //System.out.println("SINGLE BODY"); //System.out.println(domPath); return new Object[] { domPath }; } }); } @Override public void endBodyPart() { final BodyPart bodyPart = (BodyPart) entityStack.pop(); log.debug("Mime Parse Event: END BODY PART\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = bodyPart.dumpPath(); //System.out.println("END BODY PART"); //System.out.println(domPath); return new Object[] { domPath }; } }); } @Override public void epilogue(InputStream is) { Multipart multipart = multipartStack.peek(); multipart.setEpilogueStream(is); } @Override public void endMultipart() { final Multipart multipart = multipartStack.pop(); log.debug("Mime Parse Event: END MULTIPART\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = multipart.dumpPath(); //System.out.println("END MULTIPART"); //System.out.println(domPath); return new Object[] { domPath }; } }); } @Override public void endMessage() { final Entity entity = entityStack.pop(); if(entity instanceof EmbeddedMessage) { final EmbeddedMessage embeddedMessage = (EmbeddedMessage) entity; log.debug("Mime Parse Event: END EMBEDDED MESSAGE\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = embeddedMessage.dumpPath(); //System.out.println("END EMBEDDED MESSAGE"); //System.out.println(domPath); return new Object[] { domPath }; } }); } else if(entity instanceof Message) { final Message message = (Message) entity; log.debug("Mime Parse Event: END MESSAGE\n\n%s", new Supplier() { @Override public Object[] get() { String domPath = message.dumpPath(); //System.out.println("END MESSAGE"); //System.out.println(domPath); return new Object[] { domPath }; } }); } } } }