[6785] | 1 | /**************************************************************** |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one * |
---|
| 3 | * or more contributor license agreements. See the NOTICE file * |
---|
| 4 | * distributed with this work for additional information * |
---|
| 5 | * regarding copyright ownership. The ASF licenses this file * |
---|
| 6 | * to you under the Apache License, Version 2.0 (the * |
---|
| 7 | * "License"); you may not use this file except in compliance * |
---|
| 8 | * with the License. You may obtain a copy of the License at * |
---|
| 9 | * * |
---|
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 * |
---|
| 11 | * * |
---|
| 12 | * Unless required by applicable law or agreed to in writing, * |
---|
| 13 | * software distributed under the License is distributed on an * |
---|
| 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * |
---|
| 15 | * KIND, either express or implied. See the License for the * |
---|
| 16 | * specific language governing permissions and limitations * |
---|
| 17 | * under the License. * |
---|
| 18 | ****************************************************************/ |
---|
| 19 | |
---|
| 20 | package org.apache.james.mime4j.stream; |
---|
| 21 | |
---|
| 22 | import java.io.IOException; |
---|
| 23 | import java.io.InputStream; |
---|
| 24 | import java.io.InputStreamReader; |
---|
| 25 | import java.io.Reader; |
---|
| 26 | import java.nio.charset.Charset; |
---|
| 27 | import java.nio.charset.IllegalCharsetNameException; |
---|
| 28 | import java.nio.charset.UnsupportedCharsetException; |
---|
| 29 | import java.util.LinkedList; |
---|
| 30 | |
---|
| 31 | import org.apache.james.mime4j.MimeException; |
---|
| 32 | import org.apache.james.mime4j.codec.DecodeMonitor; |
---|
| 33 | import org.apache.james.mime4j.io.LineNumberInputStream; |
---|
| 34 | import org.apache.james.mime4j.io.LineNumberSource; |
---|
| 35 | import org.apache.james.mime4j.util.CharsetUtil; |
---|
| 36 | |
---|
| 37 | /** |
---|
| 38 | * <p> |
---|
| 39 | * Parses MIME (or RFC822) message streams of bytes or characters. |
---|
| 40 | * The stream is converted into an event stream. |
---|
| 41 | * <p> |
---|
| 42 | * <p> |
---|
| 43 | * Typical usage: |
---|
| 44 | * </p> |
---|
| 45 | * <pre> |
---|
| 46 | * MimeTokenStream stream = new MimeTokenStream(); |
---|
| 47 | * stream.parse(new FileInputStream("mime.msg")); |
---|
| 48 | * for (int state = stream.getState(); |
---|
| 49 | * state != MimeTokenStream.T_END_OF_STREAM; |
---|
| 50 | * state = stream.next()) { |
---|
| 51 | * switch (state) { |
---|
| 52 | * case MimeTokenStream.T_BODY: |
---|
| 53 | * System.out.println("Body detected, contents = " |
---|
| 54 | * + stream.getInputStream() + ", header data = " |
---|
| 55 | * + stream.getBodyDescriptor()); |
---|
| 56 | * break; |
---|
| 57 | * case MimeTokenStream.T_FIELD: |
---|
| 58 | * System.out.println("Header field detected: " |
---|
| 59 | * + stream.getField()); |
---|
| 60 | * break; |
---|
| 61 | * case MimeTokenStream.T_START_MULTIPART: |
---|
| 62 | * System.out.println("Multipart message detexted," |
---|
| 63 | * + " header data = " |
---|
| 64 | * + stream.getBodyDescriptor()); |
---|
| 65 | * ... |
---|
| 66 | * } |
---|
| 67 | * } |
---|
| 68 | * </pre> |
---|
| 69 | * <p>Instances of {@link MimeTokenStream} are reusable: Invoking the |
---|
| 70 | * method {@link #parse(InputStream)} resets the token streams internal |
---|
| 71 | * state. However, they are definitely <em>not</em> thread safe. If you |
---|
| 72 | * have a multi threaded application, then the suggested use is to have |
---|
| 73 | * one instance per thread.</p> |
---|
| 74 | */ |
---|
| 75 | public class MimeTokenStream { |
---|
| 76 | |
---|
| 77 | private final MimeEntityConfig config; |
---|
| 78 | private final DecodeMonitor monitor; |
---|
| 79 | private final MutableBodyDescriptorFactory bodyDescFactory; |
---|
| 80 | private final LinkedList<EntityStateMachine> entities = new LinkedList<EntityStateMachine>(); |
---|
| 81 | |
---|
| 82 | private EntityState state = EntityState.T_END_OF_STREAM; |
---|
| 83 | private EntityStateMachine currentStateMachine; |
---|
| 84 | private RecursionMode recursionMode = RecursionMode.M_RECURSE; |
---|
| 85 | private MimeEntity rootentity; |
---|
| 86 | |
---|
| 87 | /** |
---|
| 88 | * Constructs a standard (lax) stream. |
---|
| 89 | * Optional validation events will be logged only. |
---|
| 90 | * Use {@link MimeEntityConfig#setStrictParsing(boolean)} to turn on strict |
---|
| 91 | * parsing mode and pass the config object to |
---|
| 92 | * {@link MimeTokenStream#MimeTokenStream(MimeEntityConfig)} to create |
---|
| 93 | * a stream that strictly validates the input. |
---|
| 94 | */ |
---|
| 95 | public MimeTokenStream() { |
---|
| 96 | this(new MimeEntityConfig()); |
---|
| 97 | } |
---|
| 98 | |
---|
| 99 | public MimeTokenStream(final MimeEntityConfig config) { |
---|
| 100 | this(config, null, null); |
---|
| 101 | } |
---|
| 102 | |
---|
| 103 | public MimeTokenStream( |
---|
| 104 | final MimeEntityConfig config, |
---|
| 105 | final MutableBodyDescriptorFactory bodyDescFactory) { |
---|
| 106 | this(config, bodyDescFactory, null); |
---|
| 107 | } |
---|
| 108 | |
---|
| 109 | public MimeTokenStream( |
---|
| 110 | final MimeEntityConfig config, |
---|
| 111 | final MutableBodyDescriptorFactory bodyDescFactory, |
---|
| 112 | final DecodeMonitor monitor) { |
---|
| 113 | super(); |
---|
| 114 | this.config = config; |
---|
| 115 | this.monitor = monitor != null ? monitor : |
---|
| 116 | (config.isStrictParsing() ? DecodeMonitor.STRICT : DecodeMonitor.SILENT); |
---|
| 117 | this.bodyDescFactory = bodyDescFactory; |
---|
| 118 | } |
---|
| 119 | |
---|
| 120 | /** Instructs the {@code MimeTokenStream} to parse the given streams contents. |
---|
| 121 | * If the {@code MimeTokenStream} has already been in use, resets the streams |
---|
| 122 | * internal state. |
---|
| 123 | */ |
---|
| 124 | public void parse(InputStream stream) { |
---|
| 125 | doParse(stream, newBodyDescriptor(), EntityState.T_START_MESSAGE); |
---|
| 126 | } |
---|
| 127 | |
---|
| 128 | /** Instructs the {@code MimeTokenStream} to parse the given content with |
---|
| 129 | * the content type. The message stream is assumed to have no message header |
---|
| 130 | * and is expected to begin with a message body. This can be the case when |
---|
| 131 | * the message content is transmitted using a different transport protocol |
---|
| 132 | * such as HTTP. |
---|
| 133 | * <p/> |
---|
| 134 | * If the {@code MimeTokenStream} has already been in use, resets the streams |
---|
| 135 | * internal state. |
---|
| 136 | */ |
---|
| 137 | public void parseHeadless(InputStream stream, String contentType) { |
---|
| 138 | if (contentType == null) { |
---|
| 139 | throw new IllegalArgumentException("Content type may not be null"); |
---|
| 140 | } |
---|
| 141 | MutableBodyDescriptor newBodyDescriptor = newBodyDescriptor(); |
---|
| 142 | try { |
---|
| 143 | newBodyDescriptor.addField(new RawField("Content-Type", contentType)); |
---|
| 144 | } catch (MimeException ex) { |
---|
| 145 | // should never happen |
---|
| 146 | throw new IllegalArgumentException(ex.getMessage()); |
---|
| 147 | } |
---|
| 148 | doParse(stream, newBodyDescriptor, EntityState.T_END_HEADER); |
---|
| 149 | try { |
---|
| 150 | next(); |
---|
| 151 | } catch (IOException e) { |
---|
| 152 | // Should never happend: the first next after END_HEADER does not produce IO |
---|
| 153 | throw new IllegalStateException(e); |
---|
| 154 | } catch (MimeException e) { |
---|
| 155 | // This should never happen |
---|
| 156 | throw new IllegalStateException(e); |
---|
| 157 | } |
---|
| 158 | } |
---|
| 159 | |
---|
| 160 | /** |
---|
| 161 | * Creates a new instance of {@link BodyDescriptor}. Subclasses may override |
---|
| 162 | * this in order to create body descriptors, that provide more specific |
---|
| 163 | * information. |
---|
| 164 | */ |
---|
| 165 | protected MutableBodyDescriptor newBodyDescriptor() { |
---|
| 166 | final MutableBodyDescriptor result; |
---|
| 167 | if (bodyDescFactory != null) { |
---|
| 168 | result = bodyDescFactory.newInstance(monitor); |
---|
| 169 | } else { |
---|
| 170 | result = new DefaultBodyDescriptor(null, monitor); |
---|
| 171 | } |
---|
| 172 | return result; |
---|
| 173 | } |
---|
| 174 | |
---|
| 175 | public void doParse(InputStream stream, |
---|
| 176 | MutableBodyDescriptor newBodyDescriptor, EntityState start) { |
---|
| 177 | LineNumberSource lineSource = null; |
---|
| 178 | if (config.isCountLineNumbers()) { |
---|
| 179 | LineNumberInputStream lineInput = new LineNumberInputStream(stream); |
---|
| 180 | lineSource = lineInput; |
---|
| 181 | stream = lineInput; |
---|
| 182 | } |
---|
| 183 | |
---|
| 184 | rootentity = new MimeEntity( |
---|
| 185 | lineSource, |
---|
| 186 | stream, |
---|
| 187 | newBodyDescriptor, |
---|
| 188 | start, |
---|
| 189 | EntityState.T_END_MESSAGE, |
---|
| 190 | config, |
---|
| 191 | monitor); |
---|
| 192 | |
---|
| 193 | rootentity.setRecursionMode(recursionMode); |
---|
| 194 | currentStateMachine = rootentity; |
---|
| 195 | entities.clear(); |
---|
| 196 | entities.add(currentStateMachine); |
---|
| 197 | state = currentStateMachine.getState(); |
---|
| 198 | } |
---|
| 199 | |
---|
| 200 | /** |
---|
| 201 | * Determines if this parser is currently in raw mode. |
---|
| 202 | * |
---|
| 203 | * @return <code>true</code> if in raw mode, <code>false</code> |
---|
| 204 | * otherwise. |
---|
| 205 | * @see #setRecursionMode(int) |
---|
| 206 | */ |
---|
| 207 | public boolean isRaw() { |
---|
| 208 | return recursionMode == RecursionMode.M_RAW; |
---|
| 209 | } |
---|
| 210 | |
---|
| 211 | /** |
---|
| 212 | * Gets the current recursion mode. |
---|
| 213 | * The recursion mode specifies the approach taken to parsing parts. |
---|
| 214 | * {@link #M_RAW} mode does not parse the part at all. |
---|
| 215 | * {@link #M_RECURSE} mode recursively parses each mail |
---|
| 216 | * when an <code>message/rfc822</code> part is encounted; |
---|
| 217 | * {@link #M_NO_RECURSE} does not. |
---|
| 218 | * @return {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE} |
---|
| 219 | */ |
---|
| 220 | public RecursionMode getRecursionMode() { |
---|
| 221 | return recursionMode; |
---|
| 222 | } |
---|
| 223 | |
---|
| 224 | /** |
---|
| 225 | * Sets the current recursion. |
---|
| 226 | * The recursion mode specifies the approach taken to parsing parts. |
---|
| 227 | * {@link #M_RAW} mode does not parse the part at all. |
---|
| 228 | * {@link #M_RECURSE} mode recursively parses each mail |
---|
| 229 | * when an <code>message/rfc822</code> part is encounted; |
---|
| 230 | * {@link #M_NO_RECURSE} does not. |
---|
| 231 | * @param mode {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE} |
---|
| 232 | */ |
---|
| 233 | public void setRecursionMode(RecursionMode mode) { |
---|
| 234 | recursionMode = mode; |
---|
| 235 | if (currentStateMachine != null) { |
---|
| 236 | currentStateMachine.setRecursionMode(mode); |
---|
| 237 | } |
---|
| 238 | } |
---|
| 239 | |
---|
| 240 | /** |
---|
| 241 | * Finishes the parsing and stops reading lines. |
---|
| 242 | * NOTE: No more lines will be parsed but the parser |
---|
| 243 | * will still trigger 'end' events to match previously |
---|
| 244 | * triggered 'start' events. |
---|
| 245 | */ |
---|
| 246 | public void stop() { |
---|
| 247 | rootentity.stop(); |
---|
| 248 | } |
---|
| 249 | |
---|
| 250 | /** |
---|
| 251 | * Returns the current state. |
---|
| 252 | */ |
---|
| 253 | public EntityState getState() { |
---|
| 254 | return state; |
---|
| 255 | } |
---|
| 256 | |
---|
| 257 | /** |
---|
| 258 | * This method returns the raw entity, preamble, or epilogue contents. |
---|
| 259 | * <p/> |
---|
| 260 | * This method is valid, if {@link #getState()} returns either of |
---|
| 261 | * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}. |
---|
| 262 | * |
---|
| 263 | * @return Data stream, depending on the current state. |
---|
| 264 | * @throws IllegalStateException {@link #getState()} returns an |
---|
| 265 | * invalid value. |
---|
| 266 | */ |
---|
| 267 | public InputStream getInputStream() { |
---|
| 268 | return currentStateMachine.getContentStream(); |
---|
| 269 | } |
---|
| 270 | |
---|
| 271 | /** |
---|
| 272 | * This method returns a transfer decoded stream based on the MIME |
---|
| 273 | * fields with the standard defaults. |
---|
| 274 | * <p/> |
---|
| 275 | * This method is valid, if {@link #getState()} returns either of |
---|
| 276 | * {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}. |
---|
| 277 | * |
---|
| 278 | * @return Data stream, depending on the current state. |
---|
| 279 | * @throws IllegalStateException {@link #getState()} returns an |
---|
| 280 | * invalid value. |
---|
| 281 | */ |
---|
| 282 | public InputStream getDecodedInputStream() { |
---|
| 283 | return currentStateMachine.getDecodedContentStream(); |
---|
| 284 | } |
---|
| 285 | |
---|
| 286 | /** |
---|
| 287 | * Gets a reader configured for the current body or body part. |
---|
| 288 | * The reader will return a transfer and charset decoded |
---|
| 289 | * stream of characters based on the MIME fields with the standard |
---|
| 290 | * defaults. |
---|
| 291 | * This is a conveniance method and relies on {@link #getInputStream()}. |
---|
| 292 | * Consult the javadoc for that method for known limitations. |
---|
| 293 | * |
---|
| 294 | * @return <code>Reader</code>, not null |
---|
| 295 | * @see #getInputStream |
---|
| 296 | * @throws IllegalStateException {@link #getState()} returns an |
---|
| 297 | * invalid value |
---|
| 298 | * @throws UnsupportedCharsetException if there is no JVM support |
---|
| 299 | * for decoding the charset |
---|
| 300 | * @throws IllegalCharsetNameException if the charset name specified |
---|
| 301 | * in the mime type is illegal |
---|
| 302 | */ |
---|
| 303 | public Reader getReader() { |
---|
| 304 | final BodyDescriptor bodyDescriptor = getBodyDescriptor(); |
---|
| 305 | final String mimeCharset = bodyDescriptor.getCharset(); |
---|
| 306 | final Charset charset; |
---|
| 307 | if (mimeCharset == null || "".equals(mimeCharset)) { |
---|
| 308 | charset = CharsetUtil.US_ASCII; |
---|
| 309 | } else { |
---|
| 310 | charset = Charset.forName(mimeCharset); |
---|
| 311 | } |
---|
| 312 | final InputStream instream = getDecodedInputStream(); |
---|
| 313 | return new InputStreamReader(instream, charset); |
---|
| 314 | } |
---|
| 315 | |
---|
| 316 | /** |
---|
| 317 | * <p>Gets a descriptor for the current entity. |
---|
| 318 | * This method is valid if {@link #getState()} returns:</p> |
---|
| 319 | * <ul> |
---|
| 320 | * <li>{@link #T_BODY}</li> |
---|
| 321 | * <li>{@link #T_START_MULTIPART}</li> |
---|
| 322 | * <li>{@link #T_EPILOGUE}</li> |
---|
| 323 | * <li>{@link #T_PREAMBLE}</li> |
---|
| 324 | * </ul> |
---|
| 325 | * @return <code>BodyDescriptor</code>, not nulls |
---|
| 326 | */ |
---|
| 327 | public BodyDescriptor getBodyDescriptor() { |
---|
| 328 | return currentStateMachine.getBodyDescriptor(); |
---|
| 329 | } |
---|
| 330 | |
---|
| 331 | /** |
---|
| 332 | * This method is valid, if {@link #getState()} returns {@link #T_FIELD}. |
---|
| 333 | * @return String with the fields raw contents. |
---|
| 334 | * @throws IllegalStateException {@link #getState()} returns another |
---|
| 335 | * value than {@link #T_FIELD}. |
---|
| 336 | */ |
---|
| 337 | public RawField getField() { |
---|
| 338 | return currentStateMachine.getField(); |
---|
| 339 | } |
---|
| 340 | |
---|
| 341 | /** |
---|
| 342 | * This method advances the token stream to the next token. |
---|
| 343 | * @throws IllegalStateException The method has been called, although |
---|
| 344 | * {@link #getState()} was already {@link #T_END_OF_STREAM}. |
---|
| 345 | */ |
---|
| 346 | public EntityState next() throws IOException, MimeException { |
---|
| 347 | if (state == EntityState.T_END_OF_STREAM || currentStateMachine == null) { |
---|
| 348 | throw new IllegalStateException("No more tokens are available."); |
---|
| 349 | } |
---|
| 350 | while (currentStateMachine != null) { |
---|
| 351 | EntityStateMachine next = currentStateMachine.advance(); |
---|
| 352 | if (next != null) { |
---|
| 353 | entities.add(next); |
---|
| 354 | currentStateMachine = next; |
---|
| 355 | } |
---|
| 356 | state = currentStateMachine.getState(); |
---|
| 357 | if (state != EntityState.T_END_OF_STREAM) { |
---|
| 358 | return state; |
---|
| 359 | } |
---|
| 360 | entities.removeLast(); |
---|
| 361 | if (entities.isEmpty()) { |
---|
| 362 | currentStateMachine = null; |
---|
| 363 | } else { |
---|
| 364 | currentStateMachine = entities.getLast(); |
---|
| 365 | currentStateMachine.setRecursionMode(recursionMode); |
---|
| 366 | } |
---|
| 367 | } |
---|
| 368 | state = EntityState.T_END_OF_STREAM; |
---|
| 369 | return state; |
---|
| 370 | } |
---|
| 371 | |
---|
| 372 | /** |
---|
| 373 | * Renders a state as a string suitable for logging. |
---|
| 374 | * @param state |
---|
| 375 | * @return rendered as string, not null |
---|
| 376 | */ |
---|
| 377 | public static final String stateToString(EntityState state) { |
---|
| 378 | return AbstractEntity.stateToString(state); |
---|
| 379 | } |
---|
| 380 | |
---|
| 381 | |
---|
| 382 | public MimeEntityConfig getConfig() { |
---|
| 383 | return config; |
---|
| 384 | } |
---|
| 385 | } |
---|