1 | /**************************************************************** |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one * |
---|
3 | * or more contributor license agreements. See the NOTICE file * |
---|
4 | * distributed with this work for additional information * |
---|
5 | * regarding copyright ownership. The ASF licenses this file * |
---|
6 | * to you under the Apache License, Version 2.0 (the * |
---|
7 | * "License"); you may not use this file except in compliance * |
---|
8 | * with the License. You may obtain a copy of the License at * |
---|
9 | * * |
---|
10 | * http://www.apache.org/licenses/LICENSE-2.0 * |
---|
11 | * * |
---|
12 | * Unless required by applicable law or agreed to in writing, * |
---|
13 | * software distributed under the License is distributed on an * |
---|
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * |
---|
15 | * KIND, either express or implied. See the License for the * |
---|
16 | * specific language governing permissions and limitations * |
---|
17 | * under the License. * |
---|
18 | ****************************************************************/ |
---|
19 | |
---|
20 | package org.apache.james.mime4j.stream; |
---|
21 | |
---|
22 | import java.io.IOException; |
---|
23 | import java.io.InputStream; |
---|
24 | import java.io.InputStreamReader; |
---|
25 | import java.io.Reader; |
---|
26 | import java.nio.charset.Charset; |
---|
27 | import java.util.LinkedList; |
---|
28 | |
---|
29 | import org.apache.james.mime4j.MimeException; |
---|
30 | import org.apache.james.mime4j.codec.DecodeMonitor; |
---|
31 | import org.apache.james.mime4j.io.LineNumberInputStream; |
---|
32 | import org.apache.james.mime4j.io.LineNumberSource; |
---|
33 | import org.apache.james.mime4j.util.CharsetUtil; |
---|
34 | |
---|
35 | /** |
---|
36 | * <p> |
---|
37 | * Parses MIME (or RFC822) message streams of bytes or characters. |
---|
38 | * The stream is converted into an event stream. |
---|
39 | * <p> |
---|
40 | * <p> |
---|
41 | * Typical usage: |
---|
42 | * </p> |
---|
43 | * <pre> |
---|
44 | * MimeTokenStream stream = new MimeTokenStream(); |
---|
45 | * stream.parse(new FileInputStream("mime.msg")); |
---|
46 | * for (int state = stream.getState(); |
---|
47 | * state != MimeTokenStream.T_END_OF_STREAM; |
---|
48 | * state = stream.next()) { |
---|
49 | * switch (state) { |
---|
50 | * case MimeTokenStream.T_BODY: |
---|
51 | * System.out.println("Body detected, contents = " |
---|
52 | * + stream.getInputStream() + ", header data = " |
---|
53 | * + stream.getBodyDescriptor()); |
---|
54 | * break; |
---|
55 | * case MimeTokenStream.T_FIELD: |
---|
56 | * System.out.println("Header field detected: " |
---|
57 | * + stream.getField()); |
---|
58 | * break; |
---|
59 | * case MimeTokenStream.T_START_MULTIPART: |
---|
60 | * System.out.println("Multipart message detexted," |
---|
61 | * + " header data = " |
---|
62 | * + stream.getBodyDescriptor()); |
---|
63 | * ... |
---|
64 | * } |
---|
65 | * } |
---|
66 | * </pre> |
---|
67 | * <p>Instances of {@link MimeTokenStream} are reusable: Invoking the |
---|
68 | * method {@link #parse(InputStream)} resets the token streams internal |
---|
69 | * state. However, they are definitely <em>not</em> thread safe. If you |
---|
70 | * have a multi threaded application, then the suggested use is to have |
---|
71 | * one instance per thread.</p> |
---|
72 | */ |
---|
73 | public class MimeTokenStream { |
---|
74 | |
---|
75 | private final MimeEntityConfig config; |
---|
76 | private final DecodeMonitor monitor; |
---|
77 | private final FieldBuilder fieldBuilder; |
---|
78 | private final MutableBodyDescriptorFactory bodyDescFactory; |
---|
79 | private final LinkedList<EntityStateMachine> entities = new LinkedList<EntityStateMachine>(); |
---|
80 | |
---|
81 | private EntityState state = EntityState.T_END_OF_STREAM; |
---|
82 | private EntityStateMachine currentStateMachine; |
---|
83 | private RecursionMode recursionMode = RecursionMode.M_RECURSE; |
---|
84 | private MimeEntity rootentity; |
---|
85 | |
---|
86 | private LineNumberInputStream lineSource; |
---|
87 | |
---|
88 | /** |
---|
89 | * Constructs a standard (lax) stream. |
---|
90 | * Optional validation events will be logged only. |
---|
91 | * Use {@link MimeEntityConfig#setStrictParsing(boolean)} to turn on strict |
---|
92 | * parsing mode and pass the config object to |
---|
93 | * {@link MimeTokenStream#MimeTokenStream(MimeEntityConfig)} to create |
---|
94 | * a stream that strictly validates the input. |
---|
95 | */ |
---|
96 | public MimeTokenStream() { |
---|
97 | this(new MimeEntityConfig()); |
---|
98 | } |
---|
99 | |
---|
100 | public MimeTokenStream(final MimeEntityConfig config) { |
---|
101 | this(config, null, null, null); |
---|
102 | } |
---|
103 | |
---|
104 | public MimeTokenStream( |
---|
105 | final MimeEntityConfig config, |
---|
106 | final MutableBodyDescriptorFactory bodyDescFactory) { |
---|
107 | this(config, null, null, bodyDescFactory); |
---|
108 | } |
---|
109 | |
---|
110 | public MimeTokenStream( |
---|
111 | final MimeEntityConfig config, |
---|
112 | final DecodeMonitor monitor, |
---|
113 | final MutableBodyDescriptorFactory bodyDescFactory) { |
---|
114 | this(config, monitor, null, bodyDescFactory); |
---|
115 | } |
---|
116 | |
---|
117 | public MimeTokenStream( |
---|
118 | final MimeEntityConfig config, |
---|
119 | final DecodeMonitor monitor, |
---|
120 | final FieldBuilder fieldBuilder, |
---|
121 | final MutableBodyDescriptorFactory bodyDescFactory) { |
---|
122 | super(); |
---|
123 | this.config = config; |
---|
124 | this.fieldBuilder = fieldBuilder != null ? fieldBuilder : |
---|
125 | new DefaultFieldBuilder(config.getMaxHeaderLen()); |
---|
126 | this.monitor = monitor != null ? monitor : |
---|
127 | (config.isStrictParsing() ? DecodeMonitor.STRICT : DecodeMonitor.SILENT); |
---|
128 | this.bodyDescFactory = bodyDescFactory; |
---|
129 | } |
---|
130 | |
---|
131 | /** Instructs the {@code MimeTokenStream} to parse the given streams contents. |
---|
132 | * If the {@code MimeTokenStream} has already been in use, resets the streams |
---|
133 | * internal state. |
---|
134 | */ |
---|
135 | public void parse(InputStream stream) { |
---|
136 | doParse(stream, newBodyDescriptor(), EntityState.T_START_MESSAGE); |
---|
137 | } |
---|
138 | |
---|
139 | /** Instructs the {@code MimeTokenStream} to parse the given content with |
---|
140 | * the content type. The message stream is assumed to have no message header |
---|
141 | * and is expected to begin with a message body. This can be the case when |
---|
142 | * the message content is transmitted using a different transport protocol |
---|
143 | * such as HTTP. |
---|
144 | * <p/> |
---|
145 | * If the {@code MimeTokenStream} has already been in use, resets the streams |
---|
146 | * internal state. |
---|
147 | */ |
---|
148 | public void parseHeadless(InputStream stream, String contentType) { |
---|
149 | if (contentType == null) { |
---|
150 | throw new IllegalArgumentException("Content type may not be null"); |
---|
151 | } |
---|
152 | MutableBodyDescriptor newBodyDescriptor = newBodyDescriptor(); |
---|
153 | try { |
---|
154 | newBodyDescriptor.addField(new RawField("Content-Type", contentType)); |
---|
155 | } catch (MimeException ex) { |
---|
156 | // should never happen |
---|
157 | throw new IllegalArgumentException(ex.getMessage()); |
---|
158 | } |
---|
159 | doParse(stream, newBodyDescriptor, EntityState.T_END_HEADER); |
---|
160 | try { |
---|
161 | next(); |
---|
162 | } catch (IOException e) { |
---|
163 | // Should never happend: the first next after END_HEADER does not produce IO |
---|
164 | throw new IllegalStateException(e); |
---|
165 | } catch (MimeException e) { |
---|
166 | // This should never happen |
---|
167 | throw new IllegalStateException(e); |
---|
168 | } |
---|
169 | } |
---|
170 | |
---|
171 | /** |
---|
172 | * Creates a new instance of {@link BodyDescriptor}. Subclasses may override |
---|
173 | * this in order to create body descriptors, that provide more specific |
---|
174 | * information. |
---|
175 | */ |
---|
176 | protected MutableBodyDescriptor newBodyDescriptor() { |
---|
177 | final MutableBodyDescriptor result; |
---|
178 | if (bodyDescFactory != null) { |
---|
179 | result = bodyDescFactory.newInstance(monitor); |
---|
180 | } else { |
---|
181 | result = new DefaultBodyDescriptor(null, monitor); |
---|
182 | } |
---|
183 | return result; |
---|
184 | } |
---|
185 | |
---|
186 | public void doParse(InputStream stream, MutableBodyDescriptor newBodyDescriptor, EntityState start) { |
---|
187 | |
---|
188 | lineSource = new LineNumberInputStream(stream); |
---|
189 | stream = lineSource; |
---|
190 | |
---|
191 | rootentity = new MimeEntity( |
---|
192 | lineSource, |
---|
193 | stream, |
---|
194 | config, |
---|
195 | start, |
---|
196 | EntityState.T_END_MESSAGE, |
---|
197 | monitor, |
---|
198 | fieldBuilder, |
---|
199 | newBodyDescriptor); |
---|
200 | |
---|
201 | rootentity.setRecursionMode(recursionMode); |
---|
202 | currentStateMachine = rootentity; |
---|
203 | entities.clear(); |
---|
204 | entities.add(currentStateMachine); |
---|
205 | state = currentStateMachine.getState(); |
---|
206 | } |
---|
207 | |
---|
208 | public LineNumberInputStream.Entity getLineNumberRootEntity() { |
---|
209 | if(lineSource != null) { |
---|
210 | return lineSource.getRootEntity(); |
---|
211 | } |
---|
212 | return null; |
---|
213 | } |
---|
214 | |
---|
215 | /** |
---|
216 | * Determines if this parser is currently in raw mode. |
---|
217 | * |
---|
218 | * @return <code>true</code> if in raw mode, <code>false</code> |
---|
219 | * otherwise. |
---|
220 | * @see #setRecursionMode(int) |
---|
221 | */ |
---|
222 | public boolean isRaw() { |
---|
223 | return recursionMode == RecursionMode.M_RAW; |
---|
224 | } |
---|
225 | |
---|
226 | /** |
---|
227 | * Gets the current recursion mode. |
---|
228 | * The recursion mode specifies the approach taken to parsing parts. |
---|
229 | * {@link #M_RAW} mode does not parse the part at all. |
---|
230 | * {@link #M_RECURSE} mode recursively parses each mail |
---|
231 | * when an <code>message/rfc822</code> part is encounted; |
---|
232 | * {@link #M_NO_RECURSE} does not. |
---|
233 | * @return {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE} |
---|
234 | */ |
---|
235 | public RecursionMode getRecursionMode() { |
---|
236 | return recursionMode; |
---|
237 | } |
---|
238 | |
---|
239 | /** |
---|
240 | * Sets the current recursion. |
---|
241 | * The recursion mode specifies the approach taken to parsing parts. |
---|
242 | * {@link #M_RAW} mode does not parse the part at all. |
---|
243 | * {@link #M_RECURSE} mode recursively parses each mail |
---|
244 | * when an <code>message/rfc822</code> part is encounted; |
---|
245 | * {@link #M_NO_RECURSE} does not. |
---|
246 | * @param mode {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE} |
---|
247 | */ |
---|
248 | public void setRecursionMode(RecursionMode mode) { |
---|
249 | recursionMode = mode; |
---|
250 | if (currentStateMachine != null) { |
---|
251 | currentStateMachine.setRecursionMode(mode); |
---|
252 | } |
---|
253 | } |
---|
254 | |
---|
255 | /** |
---|
256 | * Finishes the parsing and stops reading lines. |
---|
257 | * NOTE: No more lines will be parsed but the parser |
---|
258 | * will still trigger 'end' events to match previously |
---|
259 | * triggered 'start' events. |
---|
260 | */ |
---|
261 | public void stop() { |
---|
262 | rootentity.stop(); |
---|
263 | } |
---|
264 | |
---|
265 | /** |
---|
266 | * Returns the current state. |
---|
267 | */ |
---|
268 | public EntityState getState() { |
---|
269 | return state; |
---|
270 | } |
---|
271 | |
---|
272 | /** |
---|
273 | * This method returns the raw entity, preamble, or epilogue contents. |
---|
274 | * <p/> |
---|
275 | * This method is valid, if {@link #getState()} returns either of |
---|
276 | * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}. |
---|
277 | * |
---|
278 | * @return Data stream, depending on the current state. |
---|
279 | * @throws IllegalStateException {@link #getState()} returns an |
---|
280 | * invalid value. |
---|
281 | */ |
---|
282 | public InputStream getInputStream() { |
---|
283 | return currentStateMachine.getContentStream(); |
---|
284 | } |
---|
285 | |
---|
286 | /** |
---|
287 | * This method returns a transfer decoded stream based on the MIME |
---|
288 | * fields with the standard defaults. |
---|
289 | * <p/> |
---|
290 | * This method is valid, if {@link #getState()} returns either of |
---|
291 | * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}. |
---|
292 | * |
---|
293 | * @return Data stream, depending on the current state. |
---|
294 | * @throws IllegalStateException {@link #getState()} returns an |
---|
295 | * invalid value. |
---|
296 | */ |
---|
297 | public InputStream getDecodedInputStream() { |
---|
298 | return currentStateMachine.getDecodedContentStream(); |
---|
299 | } |
---|
300 | |
---|
301 | /** |
---|
302 | * Gets a reader configured for the current body or body part. |
---|
303 | * The reader will return a transfer and charset decoded |
---|
304 | * stream of characters based on the MIME fields with the standard |
---|
305 | * defaults. |
---|
306 | * This is a conveniance method and relies on {@link #getInputStream()}. |
---|
307 | * Consult the javadoc for that method for known limitations. |
---|
308 | * |
---|
309 | * @return <code>Reader</code>, not null |
---|
310 | * @see #getInputStream |
---|
311 | * @throws IllegalStateException {@link #getState()} returns an |
---|
312 | * invalid value |
---|
313 | * @throws UnsupportedCharsetException if there is no JVM support |
---|
314 | * for decoding the charset |
---|
315 | * @throws IllegalCharsetNameException if the charset name specified |
---|
316 | * in the mime type is illegal |
---|
317 | */ |
---|
318 | public Reader getReader() { |
---|
319 | final BodyDescriptor bodyDescriptor = getBodyDescriptor(); |
---|
320 | final String mimeCharset = bodyDescriptor.getCharset(); |
---|
321 | final Charset charset; |
---|
322 | if (mimeCharset == null || "".equals(mimeCharset)) { |
---|
323 | charset = CharsetUtil.US_ASCII; |
---|
324 | } else { |
---|
325 | charset = Charset.forName(mimeCharset); |
---|
326 | } |
---|
327 | final InputStream instream = getDecodedInputStream(); |
---|
328 | return new InputStreamReader(instream, charset); |
---|
329 | } |
---|
330 | |
---|
331 | /** |
---|
332 | * <p>Gets a descriptor for the current entity. |
---|
333 | * This method is valid if {@link #getState()} returns:</p> |
---|
334 | * <ul> |
---|
335 | * <li>{@link #T_BODY}</li> |
---|
336 | * <li>{@link #T_START_MULTIPART}</li> |
---|
337 | * <li>{@link #T_EPILOGUE}</li> |
---|
338 | * <li>{@link #T_PREAMBLE}</li> |
---|
339 | * </ul> |
---|
340 | * @return <code>BodyDescriptor</code>, not nulls |
---|
341 | */ |
---|
342 | public BodyDescriptor getBodyDescriptor() { |
---|
343 | return currentStateMachine.getBodyDescriptor(); |
---|
344 | } |
---|
345 | |
---|
346 | /** |
---|
347 | * This method is valid, if {@link #getState()} returns {@link #T_FIELD}. |
---|
348 | * @return String with the fields raw contents. |
---|
349 | * @throws IllegalStateException {@link #getState()} returns another |
---|
350 | * value than {@link #T_FIELD}. |
---|
351 | */ |
---|
352 | public RawField getField() { |
---|
353 | return currentStateMachine.getField(); |
---|
354 | } |
---|
355 | |
---|
356 | /** |
---|
357 | * This method advances the token stream to the next token. |
---|
358 | * @throws IllegalStateException The method has been called, although |
---|
359 | * {@link #getState()} was already {@link #T_END_OF_STREAM}. |
---|
360 | */ |
---|
361 | public EntityState next() throws IOException, MimeException { |
---|
362 | if (state == EntityState.T_END_OF_STREAM || currentStateMachine == null) { |
---|
363 | throw new IllegalStateException("No more tokens are available."); |
---|
364 | } |
---|
365 | while (currentStateMachine != null) { |
---|
366 | EntityStateMachine next = currentStateMachine.advance(); |
---|
367 | if (next != null) { |
---|
368 | entities.add(next); |
---|
369 | currentStateMachine = next; |
---|
370 | } |
---|
371 | state = currentStateMachine.getState(); |
---|
372 | if (state != EntityState.T_END_OF_STREAM) { |
---|
373 | return state; |
---|
374 | } |
---|
375 | entities.removeLast(); |
---|
376 | if (entities.isEmpty()) { |
---|
377 | currentStateMachine = null; |
---|
378 | } else { |
---|
379 | currentStateMachine = entities.getLast(); |
---|
380 | currentStateMachine.setRecursionMode(recursionMode); |
---|
381 | } |
---|
382 | } |
---|
383 | state = EntityState.T_END_OF_STREAM; |
---|
384 | lineSource.endOfStream(); |
---|
385 | return state; |
---|
386 | } |
---|
387 | |
---|
388 | /** |
---|
389 | * Renders a state as a string suitable for logging. |
---|
390 | * @param state |
---|
391 | * @return rendered as string, not null |
---|
392 | */ |
---|
393 | public static final String stateToString(EntityState state) { |
---|
394 | return AbstractEntity.stateToString(state); |
---|
395 | } |
---|
396 | |
---|
397 | |
---|
398 | public MimeEntityConfig getConfig() { |
---|
399 | return config; |
---|
400 | } |
---|
401 | } |
---|