package pkg.crawler.tread; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import javax.mail.Address; import javax.mail.BodyPart; import javax.mail.Folder; import javax.mail.Message; import javax.mail.MessagingException; import javax.mail.Multipart; import javax.mail.Part; import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.common.SolrInputDocument; import pkg.crawler.entity.Message2SolrEntity; import com.sun.mail.imap.ACL; import com.sun.mail.imap.IMAPFolder; import com.sun.mail.imap.IMAPMessage; import com.sun.mail.imap.Rights; import com.sun.mail.imap.Rights.Right; public class CrawlerThread implements Runnable{ private String hostSolr; private ACL acl; private IMAPFolder imapFINBOXAux; /** * @param args */ public static void main(String[] args) { // TODO Auto-generated method stub } public CrawlerThread(IMAPFolder imapFINBOXAux, String hostSolr) { this.hostSolr = hostSolr; this.imapFINBOXAux = imapFINBOXAux; //Cria a ACL a ser configurada acl = new ACL("expresso-admin"); //Define direito de leitura Rights rights = new Rights(); rights.add(Right.READ); //Configura direito de leitura para a ACL acl.setRights(rights); } @Override public void run() { try { Thread.sleep(5000); } catch (InterruptedException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } //Cria uma instância de conexão com o servidor Solr SolrServer solrServer = new HttpSolrServer(hostSolr); //Cria List listIMAPMsgs = new ArrayList(); //INBOX //Adiciona ACL de permissão de leitura, para o usuário expresso-admin conseguir fazer a leitura try { imapFINBOXAux.addACL(acl); //Abre a caixa de e-mail imapFINBOXAux.open(Folder.READ_ONLY); //Carrega as mensagens da caixa para um array Message[] msgsINBOX = imapFINBOXAux.getMessages(); //Faz a iteração entre as mensagens for (Message msgAuxINBOX : msgsINBOX) { IMAPMessage m = (IMAPMessage)msgAuxINBOX; //adiciona as mensagens na varíavel listIMAPMsgs try { dumpPart(m, listIMAPMsgs); }catch(Exception e) { continue; } } imapFINBOXAux.removeACL("expresso-admin"); for(int i = 0; i < listIMAPMsgs.size(); i++ ) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", listIMAPMsgs.get(i).getId().toString()); doc.addField("user", listIMAPMsgs.get(i).getUser().toString()); doc.addField("folder", listIMAPMsgs.get(i).getFolder().toString()); doc.addField("msg_no", listIMAPMsgs.get(i).getMsgNo()); doc.addField("from", listIMAPMsgs.get(i).getFrom().toString()); doc.addField("to", listIMAPMsgs.get(i).getTo().toString()); doc.addField("subject", listIMAPMsgs.get(i).getSubject().toString()); doc.addField("content", listIMAPMsgs.get(i).getContent().toString()); doc.addField("copyto", listIMAPMsgs.get(i).getCopyto().toString()); doc.addField("sent_date", listIMAPMsgs.get(i).getSent_date()); doc.addField("hiddencopyto", listIMAPMsgs.get(i).getHiddencopyto().toString()); try { solrServer.add(doc); } catch (Exception e) { System.err.println("solr -> " + e.getMessage()); } } listIMAPMsgs = null; solrServer.commit(); solrServer = null; for (Folder fAux : imapFINBOXAux.list()) { //Verifica se não é uma pasta compartilhada if(fAux.getFullName().split("/").length <= 3 && !fAux.getFullName().split("/")[2].equals("user")) { //INBOX IMAPFolder imapFAux = (IMAPFolder)fAux; crawIntoUserFolders(imapFAux); } } } catch (Exception e) { //do nothing } } private void crawIntoUserFolders(IMAPFolder imapFAux) throws Exception { SolrServer solrServer = new HttpSolrServer(hostSolr); List listIMAPMsgs = new ArrayList(); try { imapFAux.addACL(acl); }catch(Exception e) { return; } imapFAux.open(Folder.READ_ONLY); Message[] msgs= imapFAux.getMessages(); for (Message msgAux: msgs) { IMAPMessage m = (IMAPMessage)msgAux; try { dumpPart(m, listIMAPMsgs); }catch(Exception e) { continue; } } imapFAux.removeACL("expresso-admin"); System.out.println(imapFAux.getFullName()); for(int i = 0; i < listIMAPMsgs.size(); i++ ) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", listIMAPMsgs.get(i).getId()); doc.addField("user", listIMAPMsgs.get(i).getUser()); doc.addField("folder", listIMAPMsgs.get(i).getFolder()); doc.addField("msg_no", listIMAPMsgs.get(i).getMsgNo()); doc.addField("from", listIMAPMsgs.get(i).getFrom()); doc.addField("to", listIMAPMsgs.get(i).getTo()); doc.addField("subject", listIMAPMsgs.get(i).getSubject()); doc.addField("content", listIMAPMsgs.get(i).getContent()); doc.addField("copyto", listIMAPMsgs.get(i).getCopyto()); doc.addField("sent_date", listIMAPMsgs.get(i).getSent_date()); doc.addField("hiddencopyto", listIMAPMsgs.get(i).getHiddencopyto()); } listIMAPMsgs = null; solrServer.commit(); solrServer = null; } public void dumpPart(IMAPMessage m, List listIMAPMsgs) throws Exception { //Verifica se possui identificador para poder continuar if(m.getMessageID() == null || m.getMessageID().equals("")){ return; } Message2SolrEntity msgEntity = null; msgEntity = new Message2SolrEntity(); msgEntity.setId(new StringBuilder( m.getMessageID())); String user = m.getFolder().getFullName().split("/")[1]; msgEntity.setUser(new StringBuilder(user)); String folder = m.getFolder().getFullName().split("/") [m.getFolder().getFullName().split("/").length-1]; if(!user.trim().equals(folder.trim())) { msgEntity.setFolder(new StringBuilder(folder)); } else { msgEntity.setFolder(new StringBuilder("INBOX")); } msgEntity.setMsgNo(new StringBuilder(String.valueOf(m.getMessageNumber()))); if(m.getSubject() != null){ msgEntity.setSubject(new StringBuilder( m.getSubject() )); } Address[] a; // FROM if ((a = m.getFrom()) != null) { for (int j = 0; j < a.length; j++) { if(msgEntity.getFrom() == null) { msgEntity.setFrom(new StringBuilder(a[j].toString())); } else { msgEntity.setFrom(msgEntity.getFrom().append(", ").append(a[j].toString())); } // System.out.println("FROM: " + a[j].toString()); } } // TO if ((a = m.getRecipients(Message.RecipientType.TO)) != null) { for (int j = 0; j < a.length; j++) { if(msgEntity.getTo() == null) { msgEntity.setTo(new StringBuilder(a[j].toString())); } else { msgEntity.setTo(msgEntity.getTo().append(", ").append(a[j].toString())); } } } // CC if ((a = m.getRecipients(Message.RecipientType.CC)) != null) { for (int j = 0; j < a.length; j++) { if(msgEntity.getCopyto() == null) { msgEntity.setCopyto(new StringBuilder(a[j].toString())); } else { msgEntity.setCopyto(msgEntity.getCopyto().append(", ").append(a[j].toString())); } } } // CC if ((a = m.getRecipients(Message.RecipientType.BCC)) != null) { for (int j = 0; j < a.length; j++) { if(msgEntity.getHiddencopyto() == null) { msgEntity.setHiddencopyto(new StringBuilder(a[j].toString())); } else { msgEntity.setHiddencopyto(new StringBuilder(msgEntity.getHiddencopyto() + ","+a[j].toString())); } } } // DATE Date d = m.getSentDate(); if(msgEntity != null) { if(d != null) { msgEntity.setSent_date(new StringBuilder(String.valueOf(d.getTime()))); } else { msgEntity.setSent_date(new StringBuilder(String.valueOf(new Date().getTime()))); } } try{ Object o = m.getContent(); if (o instanceof String) { if( msgEntity != null ) { msgEntity.setContent(new StringBuilder( o.toString())); } } else if (o instanceof Multipart) { Multipart multipart = (Multipart)o; int count = multipart.getCount(); for (int i = 0; i < count; i++) { if(msgEntity.getContent() == null || msgEntity.getContent().toString().trim().equals("")) { msgEntity.setContent(new StringBuilder("")); } if(multipart.getBodyPart(i).getContentType() != Part.ATTACHMENT) { msgEntity.setContent(msgEntity.getContent().append(" ").append(getPlainContent(multipart.getBodyPart(i)))); } } } }catch (Exception e){ System.out.println("Exception ocurred!"); e.printStackTrace(); } if(msgEntity != null && msgEntity.getId() != null){ listIMAPMsgs.add(msgEntity); } } private StringBuilder getPlainContent(BodyPart bodyPart) throws IOException, MessagingException { try { if(bodyPart.getContent() instanceof String) { // ByteArrayOutputStream out = new ByteArrayOutputStream(); // InputStream in = bodyPart.getInputStream(); // // byte[] buffer = new byte[1024]; // byte[] data = null; // // while(in.read(buffer) != -1) { // out.write(buffer); // } // // data = out.toByteArray(); StringBuilder text = new StringBuilder(bodyPart.getContent().toString()); // out.close(); // in.close(); return text; } }catch(Exception e) { System.out.println(e.getMessage()); e.printStackTrace(); return new StringBuilder(""); } return new StringBuilder(""); } }