source: contrib/MailArchiver/sources/vendor/mime4j/custom/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java @ 6785

Revision 6785, 9.7 KB checked in by rafaelraymundo, 12 years ago (diff)

Ticket #2946 - Liberado codigo do MailArchiver?. Documentação na subpasta DOCS.

Line 
1/****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one   *
3 * or more contributor license agreements.  See the NOTICE file *
4 * distributed with this work for additional information        *
5 * regarding copyright ownership.  The ASF licenses this file   *
6 * to you under the Apache License, Version 2.0 (the            *
7 * "License"); you may not use this file except in compliance   *
8 * with the License.  You may obtain a copy of the License at   *
9 *                                                              *
10 *   http://www.apache.org/licenses/LICENSE-2.0                 *
11 *                                                              *
12 * Unless required by applicable law or agreed to in writing,   *
13 * software distributed under the License is distributed on an  *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15 * KIND, either express or implied.  See the License for the    *
16 * specific language governing permissions and limitations      *
17 * under the License.                                           *
18 ****************************************************************/
19
20package org.apache.james.mime4j.codec;
21
22import java.io.ByteArrayInputStream;
23import java.io.ByteArrayOutputStream;
24import java.io.IOException;
25import java.io.UnsupportedEncodingException;
26import java.nio.charset.Charset;
27import java.util.regex.Matcher;
28import java.util.regex.Pattern;
29
30import org.apache.james.mime4j.util.CharsetUtil;
31
32/**
33 * Static methods for decoding strings, byte arrays and encoded words.
34 */
35public class DecoderUtil {
36
37    private static final Pattern PATTERN_ENCODED_WORD = Pattern.compile(
38            "(.*?)=\\?([^\\?]+?)\\?(\\w)\\?([^\\?]+?)\\?=", Pattern.DOTALL);
39
40    /**
41     * Decodes a string containing quoted-printable encoded data.
42     *
43     * @param s the string to decode.
44     * @return the decoded bytes.
45     */
46    private static byte[] decodeQuotedPrintable(String s, DecodeMonitor monitor) {
47        ByteArrayOutputStream baos = new ByteArrayOutputStream();
48       
49        try {
50            byte[] bytes = s.getBytes("US-ASCII");
51           
52            QuotedPrintableInputStream is = new QuotedPrintableInputStream(
53                                               new ByteArrayInputStream(bytes), monitor);
54           
55            int b = 0;
56            while ((b = is.read()) != -1) {
57                baos.write(b);
58            }
59        } catch (IOException e) {
60            // This should never happen!
61            throw new IllegalStateException(e);
62        }
63       
64        return baos.toByteArray();
65    }
66   
67    /**
68     * Decodes a string containing base64 encoded data.
69     *
70     * @param s the string to decode.
71     * @param monitor
72     * @return the decoded bytes.
73     */
74    private static byte[] decodeBase64(String s, DecodeMonitor monitor) {
75        ByteArrayOutputStream baos = new ByteArrayOutputStream();
76       
77        try {
78            byte[] bytes = s.getBytes("US-ASCII");
79           
80            Base64InputStream is = new Base64InputStream(
81                                        new ByteArrayInputStream(bytes), monitor);
82           
83            int b = 0;
84            while ((b = is.read()) != -1) {
85                baos.write(b);
86            }
87        } catch (IOException e) {
88            // This should never happen!
89            throw new IllegalStateException(e);
90        }
91       
92        return baos.toByteArray();
93    }
94   
95    /**
96     * Decodes an encoded text encoded with the 'B' encoding (described in
97     * RFC 2047) found in a header field body.
98     *
99     * @param encodedText the encoded text to decode.
100     * @param charset the Java charset to use.
101     * @param monitor
102     * @return the decoded string.
103     * @throws UnsupportedEncodingException if the given Java charset isn't
104     *         supported.
105     */
106    static String decodeB(String encodedText, String charset, DecodeMonitor monitor)
107            throws UnsupportedEncodingException {
108        byte[] decodedBytes = decodeBase64(encodedText, monitor);
109        return new String(decodedBytes, charset);
110    }
111   
112    /**
113     * Decodes an encoded text encoded with the 'Q' encoding (described in
114     * RFC 2047) found in a header field body.
115     *
116     * @param encodedText the encoded text to decode.
117     * @param charset the Java charset to use.
118     * @return the decoded string.
119     * @throws UnsupportedEncodingException if the given Java charset isn't
120     *         supported.
121     */
122    static String decodeQ(String encodedText, String charset, DecodeMonitor monitor)
123            throws UnsupportedEncodingException {
124        encodedText = replaceUnderscores(encodedText);
125       
126        byte[] decodedBytes = decodeQuotedPrintable(encodedText, monitor);
127        return new String(decodedBytes, charset);
128    }
129
130    static String decodeEncodedWords(String body)  {
131        return decodeEncodedWords(body, DecodeMonitor.SILENT);
132    }
133
134    /**
135     * Decodes a string containing encoded words as defined by RFC 2047. Encoded
136     * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
137     * or 'q' for quoted-printable and 'B' or 'b' for base64.
138     *
139     * @param body the string to decode
140     * @param monitor the DecodeMonitor to be used.
141     * @return the decoded string.
142     * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
143     */
144    public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException {
145        int tailIndex = 0;
146        boolean lastMatchValid = false;
147
148        StringBuilder sb = new StringBuilder();
149
150        for (Matcher matcher = PATTERN_ENCODED_WORD.matcher(body); matcher.find();) {
151            String separator = matcher.group(1);
152            String mimeCharset = matcher.group(2);
153            String encoding = matcher.group(3);
154            String encodedText = matcher.group(4);
155
156            String decoded = null;
157            decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor);
158            if (decoded == null) {
159                sb.append(matcher.group(0));
160            } else {
161                if (!lastMatchValid || !CharsetUtil.isWhitespace(separator)) {
162                    sb.append(separator);
163                }
164                sb.append(decoded);
165            }
166
167            tailIndex = matcher.end();
168            lastMatchValid = decoded != null;
169        }
170
171        if (tailIndex == 0) {
172            return body;
173        } else {
174            sb.append(body.substring(tailIndex));
175            return sb.toString();
176        }
177    }
178
179    // return null on error
180    private static String tryDecodeEncodedWord(final String mimeCharset,
181            final String encoding, final String encodedText, final DecodeMonitor monitor) {
182        Charset charset = CharsetUtil.lookup(mimeCharset);
183        if (charset == null) {
184            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
185                    "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset");
186            return null;
187        }
188
189        if (encodedText.length() == 0) {
190            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
191                    "Missing encoded text in encoded word");
192            return null;
193        }
194
195        try {
196            if (encoding.equalsIgnoreCase("Q")) {
197                return DecoderUtil.decodeQ(encodedText, charset.name(), monitor);
198            } else if (encoding.equalsIgnoreCase("B")) {
199                return DecoderUtil.decodeB(encodedText, charset.name(), monitor);
200            } else {
201                monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
202                        "Warning: Unknown encoding in encoded word");
203                return null;
204            }
205        } catch (UnsupportedEncodingException e) {
206            // should not happen because of isDecodingSupported check above
207            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
208                    "Unsupported encoding (", e.getMessage(), ") in encoded word");
209            return null;
210        } catch (RuntimeException e) {
211            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
212                    "Could not decode (", e.getMessage(), ") encoded word");
213            return null;
214        }
215    }
216
217    private static void monitor(DecodeMonitor monitor, String mimeCharset, String encoding,
218            String encodedText, String dropDesc, String... strings) throws IllegalArgumentException {
219        if (monitor.isListening()) {
220            String encodedWord = recombine(mimeCharset, encoding, encodedText);
221            StringBuilder text = new StringBuilder();
222            for (String str : strings) {
223                text.append(str);
224            }
225            text.append(" (");
226            text.append(encodedWord);
227            text.append(")");
228            String exceptionDesc = text.toString();
229            if (monitor.warn(exceptionDesc, dropDesc))
230                throw new IllegalArgumentException(text.toString());
231        }
232    }
233
234    private static String recombine(final String mimeCharset,
235            final String encoding, final String encodedText) {
236        return "=?" + mimeCharset + "?" + encoding + "?" + encodedText + "?=";
237    }
238
239    // Replace _ with =20
240    private static String replaceUnderscores(String str) {
241        // probably faster than String#replace(CharSequence, CharSequence)
242
243        StringBuilder sb = new StringBuilder(128);
244
245        for (int i = 0; i < str.length(); i++) {
246            char c = str.charAt(i);
247            if (c == '_') {
248                sb.append("=20");
249            } else {
250                sb.append(c);
251            }
252        }
253       
254        return sb.toString();
255    }
256}
Note: See TracBrowser for help on using the repository browser.