source: contrib/MailArchiver/sources/vendor/mime4j/apache-mime4j-0.7-SNAPSHOT-20110327.010440-17/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java @ 6785

Revision 6785, 10.1 KB checked in by rafaelraymundo, 12 years ago (diff)

Ticket #2946 - Liberado codigo do MailArchiver?. Documentação na subpasta DOCS.

Line 
1/****************************************************************
2 * Licensed to the Apache Software Foundation (ASF) under one   *
3 * or more contributor license agreements.  See the NOTICE file *
4 * distributed with this work for additional information        *
5 * regarding copyright ownership.  The ASF licenses this file   *
6 * to you under the Apache License, Version 2.0 (the            *
7 * "License"); you may not use this file except in compliance   *
8 * with the License.  You may obtain a copy of the License at   *
9 *                                                              *
10 *   http://www.apache.org/licenses/LICENSE-2.0                 *
11 *                                                              *
12 * Unless required by applicable law or agreed to in writing,   *
13 * software distributed under the License is distributed on an  *
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY       *
15 * KIND, either express or implied.  See the License for the    *
16 * specific language governing permissions and limitations      *
17 * under the License.                                           *
18 ****************************************************************/
19
20package org.apache.james.mime4j.codec;
21
22import java.io.ByteArrayInputStream;
23import java.io.ByteArrayOutputStream;
24import java.io.IOException;
25import java.io.UnsupportedEncodingException;
26import java.util.regex.Matcher;
27import java.util.regex.Pattern;
28
29import org.apache.james.mime4j.util.CharsetUtil;
30
31/**
32 * Static methods for decoding strings, byte arrays and encoded words.
33 */
34public class DecoderUtil {
35
36    private static final Pattern PATTERN_ENCODED_WORD = Pattern.compile(
37            "(.*?)=\\?([^\\?]+?)\\?(\\w)\\?([^\\?]+?)\\?=", Pattern.DOTALL);
38
39    /**
40     * Decodes a string containing quoted-printable encoded data.
41     *
42     * @param s the string to decode.
43     * @return the decoded bytes.
44     */
45    private static byte[] decodeQuotedPrintable(String s, DecodeMonitor monitor) {
46        ByteArrayOutputStream baos = new ByteArrayOutputStream();
47       
48        try {
49            byte[] bytes = s.getBytes("US-ASCII");
50           
51            QuotedPrintableInputStream is = new QuotedPrintableInputStream(
52                                               new ByteArrayInputStream(bytes), monitor);
53           
54            int b = 0;
55            while ((b = is.read()) != -1) {
56                baos.write(b);
57            }
58        } catch (IOException e) {
59            // This should never happen!
60            throw new IllegalStateException(e);
61        }
62       
63        return baos.toByteArray();
64    }
65   
66    /**
67     * Decodes a string containing base64 encoded data.
68     *
69     * @param s the string to decode.
70     * @param monitor
71     * @return the decoded bytes.
72     */
73    private static byte[] decodeBase64(String s, DecodeMonitor monitor) {
74        ByteArrayOutputStream baos = new ByteArrayOutputStream();
75       
76        try {
77            byte[] bytes = s.getBytes("US-ASCII");
78           
79            Base64InputStream is = new Base64InputStream(
80                                        new ByteArrayInputStream(bytes), monitor);
81           
82            int b = 0;
83            while ((b = is.read()) != -1) {
84                baos.write(b);
85            }
86        } catch (IOException e) {
87            // This should never happen!
88            throw new IllegalStateException(e);
89        }
90       
91        return baos.toByteArray();
92    }
93   
94    /**
95     * Decodes an encoded text encoded with the 'B' encoding (described in
96     * RFC 2047) found in a header field body.
97     *
98     * @param encodedText the encoded text to decode.
99     * @param charset the Java charset to use.
100     * @param monitor
101     * @return the decoded string.
102     * @throws UnsupportedEncodingException if the given Java charset isn't
103     *         supported.
104     */
105    static String decodeB(String encodedText, String charset, DecodeMonitor monitor)
106            throws UnsupportedEncodingException {
107        byte[] decodedBytes = decodeBase64(encodedText, monitor);
108        return new String(decodedBytes, charset);
109    }
110   
111    /**
112     * Decodes an encoded text encoded with the 'Q' encoding (described in
113     * RFC 2047) found in a header field body.
114     *
115     * @param encodedText the encoded text to decode.
116     * @param charset the Java charset to use.
117     * @return the decoded string.
118     * @throws UnsupportedEncodingException if the given Java charset isn't
119     *         supported.
120     */
121    static String decodeQ(String encodedText, String charset, DecodeMonitor monitor)
122            throws UnsupportedEncodingException {
123        encodedText = replaceUnderscores(encodedText);
124       
125        byte[] decodedBytes = decodeQuotedPrintable(encodedText, monitor);
126        return new String(decodedBytes, charset);
127    }
128
129    static String decodeEncodedWords(String body)  {
130        return decodeEncodedWords(body, DecodeMonitor.SILENT);
131    }
132
133    /**
134     * Decodes a string containing encoded words as defined by RFC 2047. Encoded
135     * words have the form =?charset?enc?encoded-text?= where enc is either 'Q'
136     * or 'q' for quoted-printable and 'B' or 'b' for base64.
137     *
138     * @param body the string to decode
139     * @param monitor the DecodeMonitor to be used.
140     * @return the decoded string.
141     * @throws IllegalArgumentException only if the DecodeMonitor strategy throws it (Strict parsing)
142     */
143    public static String decodeEncodedWords(String body, DecodeMonitor monitor) throws IllegalArgumentException {
144        int tailIndex = 0;
145        boolean lastMatchValid = false;
146
147        StringBuilder sb = new StringBuilder();
148
149        for (Matcher matcher = PATTERN_ENCODED_WORD.matcher(body); matcher.find();) {
150            String separator = matcher.group(1);
151            String mimeCharset = matcher.group(2);
152            String encoding = matcher.group(3);
153            String encodedText = matcher.group(4);
154
155            String decoded = null;
156            decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor);
157            if (decoded == null) {
158                sb.append(matcher.group(0));
159            } else {
160                if (!lastMatchValid || !CharsetUtil.isWhitespace(separator)) {
161                    sb.append(separator);
162                }
163                sb.append(decoded);
164            }
165
166            tailIndex = matcher.end();
167            lastMatchValid = decoded != null;
168        }
169
170        if (tailIndex == 0) {
171            return body;
172        } else {
173            sb.append(body.substring(tailIndex));
174            return sb.toString();
175        }
176    }
177
178    // return null on error
179    private static String tryDecodeEncodedWord(final String mimeCharset,
180            final String encoding, final String encodedText, DecodeMonitor monitor) throws IllegalArgumentException {
181        String charset = CharsetUtil.toJavaCharset(mimeCharset);
182        if (charset == null) {
183            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
184                    "Mime charser '", mimeCharset, "' doesn't have a corresponding Java charset");
185            return null;
186        } else if (!CharsetUtil.isDecodingSupported(charset)) {
187            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
188                    "Current JDK doesn't support decoding of charset '", charset,
189                    "' - MIME charset '", mimeCharset, "' in encoded word");
190            return null;
191        }
192
193        if (encodedText.length() == 0) {
194            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
195                    "Missing encoded text in encoded word");
196            return null;
197        }
198
199        try {
200            if (encoding.equalsIgnoreCase("Q")) {
201                return DecoderUtil.decodeQ(encodedText, charset, monitor);
202            } else if (encoding.equalsIgnoreCase("B")) {
203                return DecoderUtil.decodeB(encodedText, charset, monitor);
204            } else {
205                monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
206                        "Warning: Unknown encoding in encoded word");
207                return null;
208            }
209        } catch (UnsupportedEncodingException e) {
210            // should not happen because of isDecodingSupported check above
211            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
212                    "Unsupported encoding (", e.getMessage(), ") in encoded word");
213            return null;
214        } catch (RuntimeException e) {
215            monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
216                    "Could not decode (", e.getMessage(), ") encoded word");
217            return null;
218        }
219    }
220
221    private static void monitor(DecodeMonitor monitor, String mimeCharset, String encoding,
222            String encodedText, String dropDesc, String... strings) throws IllegalArgumentException {
223        if (monitor.isListening()) {
224            String encodedWord = recombine(mimeCharset, encoding, encodedText);
225            StringBuilder text = new StringBuilder();
226            for (String str : strings) {
227                text.append(str);
228            }
229            text.append(" (");
230            text.append(encodedWord);
231            text.append(")");
232            String exceptionDesc = text.toString();
233            if (monitor.warn(exceptionDesc, dropDesc))
234                throw new IllegalArgumentException(text.toString());
235        }
236    }
237
238    private static String recombine(final String mimeCharset,
239            final String encoding, final String encodedText) {
240        return "=?" + mimeCharset + "?" + encoding + "?" + encodedText + "?=";
241    }
242
243    // Replace _ with =20
244    private static String replaceUnderscores(String str) {
245        // probably faster than String#replace(CharSequence, CharSequence)
246
247        StringBuilder sb = new StringBuilder(128);
248
249        for (int i = 0; i < str.length(); i++) {
250            char c = str.charAt(i);
251            if (c == '_') {
252                sb.append("=20");
253            } else {
254                sb.append(c);
255            }
256        }
257       
258        return sb.toString();
259    }
260}
Note: See TracBrowser for help on using the repository browser.