source: contrib/MailArchiver/sources/vendor/mime4j/apache-mime4j-0.7-SNAPSHOT-20110327.010440-17/core/src/main/java/org/apache/james/mime4j/stream/MimeTokenStream.java @ 6785

Revision 6785, 14.2 KB checked in by rafaelraymundo, 12 years ago (diff)

Ticket #2946 - Liberado codigo do MailArchiver?. Documentação na subpasta DOCS.

Line 
1/****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one   *
3 * or more contributor license agreements.  See the NOTICE file *
4 * distributed with this work for additional information        *
5 * regarding copyright ownership.  The ASF licenses this file   *
6 * to you under the Apache License, Version 2.0 (the            *
7 * "License"); you may not use this file except in compliance   *
8 * with the License.  You may obtain a copy of the License at   *
9 *                                                              *
10 *   http://www.apache.org/licenses/LICENSE-2.0                 *
11 *                                                              *
12 * Unless required by applicable law or agreed to in writing,   *
13 * software distributed under the License is distributed on an  *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15 * KIND, either express or implied.  See the License for the    *
16 * specific language governing permissions and limitations      *
17 * under the License.                                           *
18 ****************************************************************/
19
20package org.apache.james.mime4j.stream;
21
22import java.io.IOException;
23import java.io.InputStream;
24import java.io.InputStreamReader;
25import java.io.Reader;
26import java.nio.charset.Charset;
27import java.nio.charset.IllegalCharsetNameException;
28import java.nio.charset.UnsupportedCharsetException;
29import java.util.LinkedList;
30
31import org.apache.james.mime4j.MimeException;
32import org.apache.james.mime4j.codec.DecodeMonitor;
33import org.apache.james.mime4j.io.LineNumberInputStream;
34import org.apache.james.mime4j.io.LineNumberSource;
35import org.apache.james.mime4j.util.CharsetUtil;
36
37/**
38 * <p>
39 * Parses MIME (or RFC822) message streams of bytes or characters.
40 * The stream is converted into an event stream.
41 * <p>
42 * <p>
43 * Typical usage:
44 * </p>
45 * <pre>
46 *      MimeTokenStream stream = new MimeTokenStream();
47 *      stream.parse(new FileInputStream("mime.msg"));
48 *      for (int state = stream.getState();
49 *           state != MimeTokenStream.T_END_OF_STREAM;
50 *           state = stream.next()) {
51 *          switch (state) {
52 *            case MimeTokenStream.T_BODY:
53 *              System.out.println("Body detected, contents = "
54 *                + stream.getInputStream() + ", header data = "
55 *                + stream.getBodyDescriptor());
56 *              break;
57 *            case MimeTokenStream.T_FIELD:
58 *              System.out.println("Header field detected: "
59 *                + stream.getField());
60 *              break;
61 *            case MimeTokenStream.T_START_MULTIPART:
62 *              System.out.println("Multipart message detexted,"
63 *                + " header data = "
64 *                + stream.getBodyDescriptor());
65 *            ...
66 *          }
67 *      }
68 * </pre>
69 * <p>Instances of {@link MimeTokenStream} are reusable: Invoking the
70 * method {@link #parse(InputStream)} resets the token streams internal
71 * state. However, they are definitely <em>not</em> thread safe. If you
72 * have a multi threaded application, then the suggested use is to have
73 * one instance per thread.</p>
74 */
75public class MimeTokenStream {
76   
77    private final MimeEntityConfig config;
78    private final DecodeMonitor monitor;
79    private final MutableBodyDescriptorFactory bodyDescFactory;
80    private final LinkedList<EntityStateMachine> entities = new LinkedList<EntityStateMachine>();
81   
82    private EntityState state = EntityState.T_END_OF_STREAM;
83    private EntityStateMachine currentStateMachine;
84    private RecursionMode recursionMode = RecursionMode.M_RECURSE;
85    private MimeEntity rootentity;
86   
87    /**
88     * Constructs a standard (lax) stream.
89     * Optional validation events will be logged only.
90     * Use {@link MimeEntityConfig#setStrictParsing(boolean)} to turn on strict
91     * parsing mode and pass the config object to
92     * {@link MimeTokenStream#MimeTokenStream(MimeEntityConfig)} to create
93     * a stream that strictly validates the input.
94     */
95    public MimeTokenStream() {
96        this(new MimeEntityConfig());
97    }
98
99    public MimeTokenStream(final MimeEntityConfig config) {
100        this(config, null, null);
101    }
102       
103    public MimeTokenStream(
104            final MimeEntityConfig config,
105            final MutableBodyDescriptorFactory bodyDescFactory) {
106        this(config, bodyDescFactory, null);
107    }
108
109    public MimeTokenStream(
110            final MimeEntityConfig config,
111            final MutableBodyDescriptorFactory bodyDescFactory,
112            final DecodeMonitor monitor) {
113        super();
114        this.config = config;
115        this.monitor = monitor != null ? monitor :
116            (config.isStrictParsing() ? DecodeMonitor.STRICT : DecodeMonitor.SILENT);
117        this.bodyDescFactory = bodyDescFactory;
118    }
119
120    /** Instructs the {@code MimeTokenStream} to parse the given streams contents.
121     * If the {@code MimeTokenStream} has already been in use, resets the streams
122     * internal state.
123     */
124    public void parse(InputStream stream) {
125        doParse(stream, newBodyDescriptor(), EntityState.T_START_MESSAGE);
126    }
127
128    /** Instructs the {@code MimeTokenStream} to parse the given content with
129     * the content type. The message stream is assumed to have no message header
130     * and is expected to begin with a message body. This can be the case when
131     * the message content is transmitted using a different transport protocol
132     * such as HTTP.
133     * <p/>
134     * If the {@code MimeTokenStream} has already been in use, resets the streams
135     * internal state.
136     */   
137    public void parseHeadless(InputStream stream, String contentType) {
138        if (contentType == null) {
139            throw new IllegalArgumentException("Content type may not be null");
140        }
141        MutableBodyDescriptor newBodyDescriptor = newBodyDescriptor();
142        try {
143            newBodyDescriptor.addField(new RawField("Content-Type", contentType));
144        } catch (MimeException ex) {
145            // should never happen
146            throw new IllegalArgumentException(ex.getMessage());
147        }
148        doParse(stream, newBodyDescriptor, EntityState.T_END_HEADER);
149        try {
150            next();
151        } catch (IOException e) {
152            // Should never happend: the first next after END_HEADER does not produce IO
153            throw new IllegalStateException(e);
154        } catch (MimeException e) {
155            // This should never happen
156            throw new IllegalStateException(e);
157        }
158    }
159
160    /**
161     * Creates a new instance of {@link BodyDescriptor}. Subclasses may override
162     * this in order to create body descriptors, that provide more specific
163     * information.
164     */
165    protected MutableBodyDescriptor newBodyDescriptor() {
166        final MutableBodyDescriptor result;
167        if (bodyDescFactory != null) {
168            result = bodyDescFactory.newInstance(monitor);
169        } else {
170            result = new DefaultBodyDescriptor(null, monitor);
171        }
172        return result;
173    }
174
175    public void doParse(InputStream stream,
176            MutableBodyDescriptor newBodyDescriptor, EntityState start) {
177        LineNumberSource lineSource = null;
178        if (config.isCountLineNumbers()) {
179            LineNumberInputStream lineInput = new LineNumberInputStream(stream);
180            lineSource = lineInput;
181            stream = lineInput;
182        }
183
184        rootentity = new MimeEntity(
185                lineSource,
186                stream,
187                newBodyDescriptor,
188                start,
189                EntityState.T_END_MESSAGE,
190                config,
191                monitor);
192
193        rootentity.setRecursionMode(recursionMode);
194        currentStateMachine = rootentity;
195        entities.clear();
196        entities.add(currentStateMachine);
197        state = currentStateMachine.getState();
198    }
199
200    /**
201     * Determines if this parser is currently in raw mode.
202     *
203     * @return <code>true</code> if in raw mode, <code>false</code>
204     *         otherwise.
205     * @see #setRecursionMode(int)
206     */
207    public boolean isRaw() {
208        return recursionMode == RecursionMode.M_RAW;
209    }
210   
211    /**
212     * Gets the current recursion mode.
213     * The recursion mode specifies the approach taken to parsing parts.
214     * {@link #M_RAW}  mode does not parse the part at all.
215     * {@link #M_RECURSE} mode recursively parses each mail
216     * when an <code>message/rfc822</code> part is encounted;
217     * {@link #M_NO_RECURSE} does not.
218     * @return {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE}
219     */
220    public RecursionMode getRecursionMode() {
221        return recursionMode;
222    }
223   
224    /**
225     * Sets the current recursion.
226     * The recursion mode specifies the approach taken to parsing parts.
227     * {@link #M_RAW}  mode does not parse the part at all.
228     * {@link #M_RECURSE} mode recursively parses each mail
229     * when an <code>message/rfc822</code> part is encounted;
230     * {@link #M_NO_RECURSE} does not.
231     * @param mode {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE}
232     */
233    public void setRecursionMode(RecursionMode mode) {
234        recursionMode = mode;
235        if (currentStateMachine != null) {
236            currentStateMachine.setRecursionMode(mode);
237        }
238    }
239
240    /**
241     * Finishes the parsing and stops reading lines.
242     * NOTE: No more lines will be parsed but the parser
243     * will still trigger 'end' events to match previously
244     * triggered 'start' events.
245     */
246    public void stop() {
247        rootentity.stop();
248    }
249
250    /**
251     * Returns the current state.
252     */
253    public EntityState getState() {
254        return state;
255    }
256
257    /**
258     * This method returns the raw entity, preamble, or epilogue contents.
259     * <p/>
260     * This method is valid, if {@link #getState()} returns either of
261     * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}.
262     *
263     * @return Data stream, depending on the current state.
264     * @throws IllegalStateException {@link #getState()} returns an
265     *   invalid value.
266     */
267    public InputStream getInputStream() {
268        return currentStateMachine.getContentStream();
269    }
270   
271    /**
272     * This method returns a transfer decoded stream based on the MIME
273     * fields with the standard defaults.
274     * <p/>
275     * This method is valid, if {@link #getState()} returns either of
276     * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}.
277     *
278     * @return Data stream, depending on the current state.
279     * @throws IllegalStateException {@link #getState()} returns an
280     *   invalid value.
281     */
282    public InputStream getDecodedInputStream() {
283        return currentStateMachine.getDecodedContentStream();
284    }
285
286    /**
287     * Gets a reader configured for the current body or body part.
288     * The reader will return a transfer and charset decoded
289     * stream of characters based on the MIME fields with the standard
290     * defaults.
291     * This is a conveniance method and relies on {@link #getInputStream()}.
292     * Consult the javadoc for that method for known limitations.
293     *
294     * @return <code>Reader</code>, not null
295     * @see #getInputStream
296     * @throws IllegalStateException {@link #getState()} returns an
297     *   invalid value
298     * @throws UnsupportedCharsetException if there is no JVM support
299     * for decoding the charset
300     * @throws IllegalCharsetNameException if the charset name specified
301     * in the mime type is illegal
302     */
303    public Reader getReader() {
304        final BodyDescriptor bodyDescriptor = getBodyDescriptor();
305        final String mimeCharset = bodyDescriptor.getCharset();
306        final Charset charset;
307        if (mimeCharset == null || "".equals(mimeCharset)) {
308            charset = CharsetUtil.US_ASCII;
309        } else {
310            charset = Charset.forName(mimeCharset);
311        }
312        final InputStream instream = getDecodedInputStream();
313        return new InputStreamReader(instream, charset);
314    }
315   
316    /**
317     * <p>Gets a descriptor for the current entity.
318     * This method is valid if {@link #getState()} returns:</p>
319     * <ul>
320     * <li>{@link #T_BODY}</li>
321     * <li>{@link #T_START_MULTIPART}</li>
322     * <li>{@link #T_EPILOGUE}</li>
323     * <li>{@link #T_PREAMBLE}</li>
324     * </ul>
325     * @return <code>BodyDescriptor</code>, not nulls
326     */
327    public BodyDescriptor getBodyDescriptor() {
328        return currentStateMachine.getBodyDescriptor();
329    }
330
331    /**
332     * This method is valid, if {@link #getState()} returns {@link #T_FIELD}.
333     * @return String with the fields raw contents.
334     * @throws IllegalStateException {@link #getState()} returns another
335     *   value than {@link #T_FIELD}.
336     */
337    public RawField getField() {
338        return currentStateMachine.getField();
339    }
340   
341    /**
342     * This method advances the token stream to the next token.
343     * @throws IllegalStateException The method has been called, although
344     *   {@link #getState()} was already {@link #T_END_OF_STREAM}.
345     */
346    public EntityState next() throws IOException, MimeException {
347        if (state == EntityState.T_END_OF_STREAM  ||  currentStateMachine == null) {
348            throw new IllegalStateException("No more tokens are available.");
349        }
350        while (currentStateMachine != null) {
351            EntityStateMachine next = currentStateMachine.advance();
352            if (next != null) {
353                entities.add(next);
354                currentStateMachine = next;
355            }
356            state = currentStateMachine.getState();
357            if (state != EntityState.T_END_OF_STREAM) {
358                return state;
359            }
360            entities.removeLast();
361            if (entities.isEmpty()) {
362                currentStateMachine = null;
363            } else {
364                currentStateMachine = entities.getLast();
365                currentStateMachine.setRecursionMode(recursionMode);
366            }
367        }
368        state = EntityState.T_END_OF_STREAM;
369        return state;
370    }
371
372    /**
373     * Renders a state as a string suitable for logging.
374     * @param state
375     * @return rendered as string, not null
376     */
377    public static final String stateToString(EntityState state) {
378        return AbstractEntity.stateToString(state);
379    }
380
381
382    public MimeEntityConfig getConfig() {
383        return config;
384    }
385}
Note: See TracBrowser for help on using the repository browser.