source: contrib/MailArchiver/sources/vendor/mime4j/custom/core/src/main/java/org/apache/james/mime4j/stream/MimeTokenStream.java @ 6785

Revision 6785, 14.7 KB checked in by rafaelraymundo, 12 years ago (diff)

Ticket #2946 - Liberado codigo do MailArchiver?. Documentação na subpasta DOCS.

Line 
1/****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one   *
3 * or more contributor license agreements.  See the NOTICE file *
4 * distributed with this work for additional information        *
5 * regarding copyright ownership.  The ASF licenses this file   *
6 * to you under the Apache License, Version 2.0 (the            *
7 * "License"); you may not use this file except in compliance   *
8 * with the License.  You may obtain a copy of the License at   *
9 *                                                              *
10 *   http://www.apache.org/licenses/LICENSE-2.0                 *
11 *                                                              *
12 * Unless required by applicable law or agreed to in writing,   *
13 * software distributed under the License is distributed on an  *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15 * KIND, either express or implied.  See the License for the    *
16 * specific language governing permissions and limitations      *
17 * under the License.                                           *
18 ****************************************************************/
19
20package org.apache.james.mime4j.stream;
21
22import java.io.IOException;
23import java.io.InputStream;
24import java.io.InputStreamReader;
25import java.io.Reader;
26import java.nio.charset.Charset;
27import java.util.LinkedList;
28
29import org.apache.james.mime4j.MimeException;
30import org.apache.james.mime4j.codec.DecodeMonitor;
31import org.apache.james.mime4j.io.LineNumberInputStream;
32import org.apache.james.mime4j.io.LineNumberSource;
33import org.apache.james.mime4j.util.CharsetUtil;
34
35/**
36 * <p>
37 * Parses MIME (or RFC822) message streams of bytes or characters.
38 * The stream is converted into an event stream.
39 * <p>
40 * <p>
41 * Typical usage:
42 * </p>
43 * <pre>
44 *      MimeTokenStream stream = new MimeTokenStream();
45 *      stream.parse(new FileInputStream("mime.msg"));
46 *      for (int state = stream.getState();
47 *           state != MimeTokenStream.T_END_OF_STREAM;
48 *           state = stream.next()) {
49 *          switch (state) {
50 *            case MimeTokenStream.T_BODY:
51 *              System.out.println("Body detected, contents = "
52 *                + stream.getInputStream() + ", header data = "
53 *                + stream.getBodyDescriptor());
54 *              break;
55 *            case MimeTokenStream.T_FIELD:
56 *              System.out.println("Header field detected: "
57 *                + stream.getField());
58 *              break;
59 *            case MimeTokenStream.T_START_MULTIPART:
60 *              System.out.println("Multipart message detexted,"
61 *                + " header data = "
62 *                + stream.getBodyDescriptor());
63 *            ...
64 *          }
65 *      }
66 * </pre>
67 * <p>Instances of {@link MimeTokenStream} are reusable: Invoking the
68 * method {@link #parse(InputStream)} resets the token streams internal
69 * state. However, they are definitely <em>not</em> thread safe. If you
70 * have a multi threaded application, then the suggested use is to have
71 * one instance per thread.</p>
72 */
73public class MimeTokenStream {
74   
75    private final MimeEntityConfig config;
76    private final DecodeMonitor monitor;
77    private final FieldBuilder fieldBuilder;
78    private final MutableBodyDescriptorFactory bodyDescFactory;
79    private final LinkedList<EntityStateMachine> entities = new LinkedList<EntityStateMachine>();
80   
81    private EntityState state = EntityState.T_END_OF_STREAM;
82    private EntityStateMachine currentStateMachine;
83    private RecursionMode recursionMode = RecursionMode.M_RECURSE;
84    private MimeEntity rootentity;
85
86    private LineNumberInputStream lineSource;
87   
88    /**
89     * Constructs a standard (lax) stream.
90     * Optional validation events will be logged only.
91     * Use {@link MimeEntityConfig#setStrictParsing(boolean)} to turn on strict
92     * parsing mode and pass the config object to
93     * {@link MimeTokenStream#MimeTokenStream(MimeEntityConfig)} to create
94     * a stream that strictly validates the input.
95     */
96    public MimeTokenStream() {
97        this(new MimeEntityConfig());
98    }
99
100    public MimeTokenStream(final MimeEntityConfig config) {
101        this(config, null, null, null);
102    }
103       
104    public MimeTokenStream(
105            final MimeEntityConfig config,
106            final MutableBodyDescriptorFactory bodyDescFactory) {
107        this(config, null, null, bodyDescFactory);
108    }
109
110    public MimeTokenStream(
111            final MimeEntityConfig config,
112            final DecodeMonitor monitor,
113            final MutableBodyDescriptorFactory bodyDescFactory) {
114        this(config, monitor, null, bodyDescFactory);
115    }
116
117    public MimeTokenStream(
118            final MimeEntityConfig config,
119            final DecodeMonitor monitor,
120            final FieldBuilder fieldBuilder,
121            final MutableBodyDescriptorFactory bodyDescFactory) {
122        super();
123        this.config = config;
124        this.fieldBuilder = fieldBuilder != null ? fieldBuilder :
125            new DefaultFieldBuilder(config.getMaxHeaderLen());
126        this.monitor = monitor != null ? monitor :
127            (config.isStrictParsing() ? DecodeMonitor.STRICT : DecodeMonitor.SILENT);
128        this.bodyDescFactory = bodyDescFactory;
129    }
130
131    /** Instructs the {@code MimeTokenStream} to parse the given streams contents.
132     * If the {@code MimeTokenStream} has already been in use, resets the streams
133     * internal state.
134     */
135    public void parse(InputStream stream) {
136        doParse(stream, newBodyDescriptor(), EntityState.T_START_MESSAGE);
137    }
138
139    /** Instructs the {@code MimeTokenStream} to parse the given content with
140     * the content type. The message stream is assumed to have no message header
141     * and is expected to begin with a message body. This can be the case when
142     * the message content is transmitted using a different transport protocol
143     * such as HTTP.
144     * <p/>
145     * If the {@code MimeTokenStream} has already been in use, resets the streams
146     * internal state.
147     */   
148    public void parseHeadless(InputStream stream, String contentType) {
149        if (contentType == null) {
150            throw new IllegalArgumentException("Content type may not be null");
151        }
152        MutableBodyDescriptor newBodyDescriptor = newBodyDescriptor();
153        try {
154            newBodyDescriptor.addField(new RawField("Content-Type", contentType));
155        } catch (MimeException ex) {
156            // should never happen
157            throw new IllegalArgumentException(ex.getMessage());
158        }
159        doParse(stream, newBodyDescriptor, EntityState.T_END_HEADER);
160        try {
161            next();
162        } catch (IOException e) {
163            // Should never happend: the first next after END_HEADER does not produce IO
164            throw new IllegalStateException(e);
165        } catch (MimeException e) {
166            // This should never happen
167            throw new IllegalStateException(e);
168        }
169    }
170
171    /**
172     * Creates a new instance of {@link BodyDescriptor}. Subclasses may override
173     * this in order to create body descriptors, that provide more specific
174     * information.
175     */
176    protected MutableBodyDescriptor newBodyDescriptor() {
177        final MutableBodyDescriptor result;
178        if (bodyDescFactory != null) {
179            result = bodyDescFactory.newInstance(monitor);
180        } else {
181            result = new DefaultBodyDescriptor(null, monitor);
182        }
183        return result;
184    }
185
186    public void doParse(InputStream stream, MutableBodyDescriptor newBodyDescriptor, EntityState start) {
187       
188        lineSource = new LineNumberInputStream(stream);
189        stream = lineSource;
190
191        rootentity = new MimeEntity(
192                lineSource,
193                stream,
194                config,
195                start,
196                EntityState.T_END_MESSAGE,
197                monitor,
198                fieldBuilder,
199                newBodyDescriptor);
200
201        rootentity.setRecursionMode(recursionMode);
202        currentStateMachine = rootentity;
203        entities.clear();
204        entities.add(currentStateMachine);
205        state = currentStateMachine.getState();
206    }
207
208    public LineNumberInputStream.Entity getLineNumberRootEntity() {
209        if(lineSource != null) {           
210            return lineSource.getRootEntity();
211        }
212        return null;
213    }
214
215    /**
216     * Determines if this parser is currently in raw mode.
217     *
218     * @return <code>true</code> if in raw mode, <code>false</code>
219     *         otherwise.
220     * @see #setRecursionMode(int)
221     */
222    public boolean isRaw() {
223        return recursionMode == RecursionMode.M_RAW;
224    }
225   
226    /**
227     * Gets the current recursion mode.
228     * The recursion mode specifies the approach taken to parsing parts.
229     * {@link #M_RAW}  mode does not parse the part at all.
230     * {@link #M_RECURSE} mode recursively parses each mail
231     * when an <code>message/rfc822</code> part is encounted;
232     * {@link #M_NO_RECURSE} does not.
233     * @return {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE}
234     */
235    public RecursionMode getRecursionMode() {
236        return recursionMode;
237    }
238   
239    /**
240     * Sets the current recursion.
241     * The recursion mode specifies the approach taken to parsing parts.
242     * {@link #M_RAW}  mode does not parse the part at all.
243     * {@link #M_RECURSE} mode recursively parses each mail
244     * when an <code>message/rfc822</code> part is encounted;
245     * {@link #M_NO_RECURSE} does not.
246     * @param mode {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE}
247     */
248    public void setRecursionMode(RecursionMode mode) {
249        recursionMode = mode;
250        if (currentStateMachine != null) {
251            currentStateMachine.setRecursionMode(mode);
252        }
253    }
254
255    /**
256     * Finishes the parsing and stops reading lines.
257     * NOTE: No more lines will be parsed but the parser
258     * will still trigger 'end' events to match previously
259     * triggered 'start' events.
260     */
261    public void stop() {
262        rootentity.stop();
263    }
264
265    /**
266     * Returns the current state.
267     */
268    public EntityState getState() {
269        return state;
270    }
271
272    /**
273     * This method returns the raw entity, preamble, or epilogue contents.
274     * <p/>
275     * This method is valid, if {@link #getState()} returns either of
276     * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}.
277     *
278     * @return Data stream, depending on the current state.
279     * @throws IllegalStateException {@link #getState()} returns an
280     *   invalid value.
281     */
282    public InputStream getInputStream() {
283        return currentStateMachine.getContentStream();
284    }
285   
286    /**
287     * This method returns a transfer decoded stream based on the MIME
288     * fields with the standard defaults.
289     * <p/>
290     * This method is valid, if {@link #getState()} returns either of
291     * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}.
292     *
293     * @return Data stream, depending on the current state.
294     * @throws IllegalStateException {@link #getState()} returns an
295     *   invalid value.
296     */
297    public InputStream getDecodedInputStream() {
298        return currentStateMachine.getDecodedContentStream();
299    }
300
301    /**
302     * Gets a reader configured for the current body or body part.
303     * The reader will return a transfer and charset decoded
304     * stream of characters based on the MIME fields with the standard
305     * defaults.
306     * This is a conveniance method and relies on {@link #getInputStream()}.
307     * Consult the javadoc for that method for known limitations.
308     *
309     * @return <code>Reader</code>, not null
310     * @see #getInputStream
311     * @throws IllegalStateException {@link #getState()} returns an
312     *   invalid value
313     * @throws UnsupportedCharsetException if there is no JVM support
314     * for decoding the charset
315     * @throws IllegalCharsetNameException if the charset name specified
316     * in the mime type is illegal
317     */
318    public Reader getReader() {
319        final BodyDescriptor bodyDescriptor = getBodyDescriptor();
320        final String mimeCharset = bodyDescriptor.getCharset();
321        final Charset charset;
322        if (mimeCharset == null || "".equals(mimeCharset)) {
323            charset = CharsetUtil.US_ASCII;
324        } else {
325            charset = Charset.forName(mimeCharset);
326        }
327        final InputStream instream = getDecodedInputStream();
328        return new InputStreamReader(instream, charset);
329    }
330   
331    /**
332     * <p>Gets a descriptor for the current entity.
333     * This method is valid if {@link #getState()} returns:</p>
334     * <ul>
335     * <li>{@link #T_BODY}</li>
336     * <li>{@link #T_START_MULTIPART}</li>
337     * <li>{@link #T_EPILOGUE}</li>
338     * <li>{@link #T_PREAMBLE}</li>
339     * </ul>
340     * @return <code>BodyDescriptor</code>, not nulls
341     */
342    public BodyDescriptor getBodyDescriptor() {
343        return currentStateMachine.getBodyDescriptor();
344    }
345
346    /**
347     * This method is valid, if {@link #getState()} returns {@link #T_FIELD}.
348     * @return String with the fields raw contents.
349     * @throws IllegalStateException {@link #getState()} returns another
350     *   value than {@link #T_FIELD}.
351     */
352    public RawField getField() {
353        return currentStateMachine.getField();
354    }
355   
356    /**
357     * This method advances the token stream to the next token.
358     * @throws IllegalStateException The method has been called, although
359     *   {@link #getState()} was already {@link #T_END_OF_STREAM}.
360     */
361    public EntityState next() throws IOException, MimeException {
362        if (state == EntityState.T_END_OF_STREAM  ||  currentStateMachine == null) {
363            throw new IllegalStateException("No more tokens are available.");
364        }
365        while (currentStateMachine != null) {
366            EntityStateMachine next = currentStateMachine.advance();
367            if (next != null) {
368                entities.add(next);
369                currentStateMachine = next;
370            }
371            state = currentStateMachine.getState();
372            if (state != EntityState.T_END_OF_STREAM) {
373                return state;
374            }
375            entities.removeLast();
376            if (entities.isEmpty()) {
377                currentStateMachine = null;
378            } else {
379                currentStateMachine = entities.getLast();
380                currentStateMachine.setRecursionMode(recursionMode);
381            }
382        }
383        state = EntityState.T_END_OF_STREAM;
384        lineSource.endOfStream();
385        return state;
386    }
387
388    /**
389     * Renders a state as a string suitable for logging.
390     * @param state
391     * @return rendered as string, not null
392     */
393    public static final String stateToString(EntityState state) {
394        return AbstractEntity.stateToString(state);
395    }
396
397
398    public MimeEntityConfig getConfig() {
399        return config;
400    }
401}
Note: See TracBrowser for help on using the repository browser.