1 | /**************************************************************** |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one * |
---|
3 | * or more contributor license agreements. See the NOTICE file * |
---|
4 | * distributed with this work for additional information * |
---|
5 | * regarding copyright ownership. The ASF licenses this file * |
---|
6 | * to you under the Apache License, Version 2.0 (the * |
---|
7 | * "License"); you may not use this file except in compliance * |
---|
8 | * with the License. You may obtain a copy of the License at * |
---|
9 | * * |
---|
10 | * http://www.apache.org/licenses/LICENSE-2.0 * |
---|
11 | * * |
---|
12 | * Unless required by applicable law or agreed to in writing, * |
---|
13 | * software distributed under the License is distributed on an * |
---|
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * |
---|
15 | * KIND, either express or implied. See the License for the * |
---|
16 | * specific language governing permissions and limitations * |
---|
17 | * under the License. * |
---|
18 | ****************************************************************/ |
---|
19 | |
---|
20 | package org.apache.james.mime4j.stream; |
---|
21 | |
---|
22 | import java.util.ArrayList; |
---|
23 | import java.util.List; |
---|
24 | |
---|
25 | import org.apache.james.mime4j.MimeException; |
---|
26 | import org.apache.james.mime4j.util.ByteSequence; |
---|
27 | import org.apache.james.mime4j.util.CharsetUtil; |
---|
28 | import org.apache.james.mime4j.util.ContentUtil; |
---|
29 | |
---|
30 | /** |
---|
31 | * The basic immutable MIME field. |
---|
32 | */ |
---|
33 | public class RawFieldParser { |
---|
34 | |
---|
35 | static final int[] COLON = { ':' }; |
---|
36 | static final int[] EQUAL_OR_SEMICOLON = { '=', ';' }; |
---|
37 | static final int[] SEMICOLON = { ';' }; |
---|
38 | |
---|
39 | public static final RawFieldParser DEFAULT = new RawFieldParser(); |
---|
40 | |
---|
41 | public RawField parseField(final ByteSequence raw) throws MimeException { |
---|
42 | if (raw == null) { |
---|
43 | return null; |
---|
44 | } |
---|
45 | ParserCursor cursor = new ParserCursor(0, raw.length()); |
---|
46 | String name = parseToken(raw, cursor, COLON); |
---|
47 | if (cursor.atEnd()) { |
---|
48 | throw new MimeException("Invalid MIME field: no name/value separator found: " + |
---|
49 | raw.toString()); |
---|
50 | } |
---|
51 | return new RawField(raw, cursor.getPos(), name, null); |
---|
52 | } |
---|
53 | |
---|
54 | public RawBody parseRawBody(final RawField field) { |
---|
55 | ByteSequence buf = field.getRaw(); |
---|
56 | int pos = field.getDelimiterIdx() + 1; |
---|
57 | if (buf == null) { |
---|
58 | String body = field.getBody(); |
---|
59 | if (body == null) { |
---|
60 | return new RawBody("", null); |
---|
61 | } |
---|
62 | buf = ContentUtil.encode(body); |
---|
63 | pos = 0; |
---|
64 | } |
---|
65 | ParserCursor cursor = new ParserCursor(pos, buf.length()); |
---|
66 | return parseRawBody(buf, cursor); |
---|
67 | } |
---|
68 | |
---|
69 | RawBody parseRawBody(final ByteSequence buf, final ParserCursor cursor) { |
---|
70 | String value = parseToken(buf, cursor, SEMICOLON); |
---|
71 | if (cursor.atEnd()) { |
---|
72 | return new RawBody(value, new ArrayList<NameValuePair>()); |
---|
73 | } |
---|
74 | cursor.updatePos(cursor.getPos() + 1); |
---|
75 | List<NameValuePair> params = parseParameters(buf, cursor); |
---|
76 | return new RawBody(value, params); |
---|
77 | } |
---|
78 | |
---|
79 | List<NameValuePair> parseParameters(final ByteSequence buf, final ParserCursor cursor) { |
---|
80 | List<NameValuePair> params = new ArrayList<NameValuePair>(); |
---|
81 | skipWhiteSpace(buf, cursor); |
---|
82 | while (!cursor.atEnd()) { |
---|
83 | NameValuePair param = parseParameter(buf, cursor); |
---|
84 | params.add(param); |
---|
85 | } |
---|
86 | return params; |
---|
87 | } |
---|
88 | |
---|
89 | NameValuePair parseParameter(final ByteSequence buf, final ParserCursor cursor) { |
---|
90 | String name = parseToken(buf, cursor, EQUAL_OR_SEMICOLON); |
---|
91 | if (cursor.atEnd()) { |
---|
92 | return new NameValuePair(name, null); |
---|
93 | } |
---|
94 | int delim = buf.byteAt(cursor.getPos()); |
---|
95 | cursor.updatePos(cursor.getPos() + 1); |
---|
96 | if (delim == ';') { |
---|
97 | return new NameValuePair(name, null); |
---|
98 | } |
---|
99 | String value = parseValue(buf, cursor, SEMICOLON); |
---|
100 | if (!cursor.atEnd()) { |
---|
101 | cursor.updatePos(cursor.getPos() + 1); |
---|
102 | } |
---|
103 | return new NameValuePair(name, value); |
---|
104 | } |
---|
105 | |
---|
106 | static boolean isOneOf(final int ch, final int[] chs) { |
---|
107 | if (chs != null) { |
---|
108 | for (int i = 0; i < chs.length; i++) { |
---|
109 | if (ch == chs[i]) { |
---|
110 | return true; |
---|
111 | } |
---|
112 | } |
---|
113 | } |
---|
114 | return false; |
---|
115 | } |
---|
116 | |
---|
117 | static String parseToken(final ByteSequence buf, final ParserCursor cursor, final int[] delimiters) { |
---|
118 | StringBuilder dst = new StringBuilder(); |
---|
119 | boolean whitespace = false; |
---|
120 | while (!cursor.atEnd()) { |
---|
121 | char current = (char) (buf.byteAt(cursor.getPos()) & 0xff); |
---|
122 | if (isOneOf(current, delimiters)) { |
---|
123 | break; |
---|
124 | } else if (CharsetUtil.isWhitespace(current)) { |
---|
125 | skipWhiteSpace(buf, cursor); |
---|
126 | whitespace = true; |
---|
127 | } else if (current == '(') { |
---|
128 | skipComment(buf, cursor); |
---|
129 | } else { |
---|
130 | if (dst.length() > 0 && whitespace) { |
---|
131 | dst.append(' '); |
---|
132 | } |
---|
133 | copyContent(buf, cursor, delimiters, dst); |
---|
134 | whitespace = false; |
---|
135 | } |
---|
136 | } |
---|
137 | return dst.toString(); |
---|
138 | } |
---|
139 | |
---|
140 | static String parseValue(final ByteSequence buf, final ParserCursor cursor, final int[] delimiters) { |
---|
141 | StringBuilder dst = new StringBuilder(); |
---|
142 | boolean whitespace = false; |
---|
143 | while (!cursor.atEnd()) { |
---|
144 | char current = (char) (buf.byteAt(cursor.getPos()) & 0xff); |
---|
145 | if (isOneOf(current, delimiters)) { |
---|
146 | break; |
---|
147 | } else if (CharsetUtil.isWhitespace(current)) { |
---|
148 | skipWhiteSpace(buf, cursor); |
---|
149 | whitespace = true; |
---|
150 | } else if (current == '(') { |
---|
151 | skipComment(buf, cursor); |
---|
152 | } else if (current == '\"') { |
---|
153 | if (dst.length() > 0 && whitespace) { |
---|
154 | dst.append(' '); |
---|
155 | } |
---|
156 | copyQuotedContent(buf, cursor, dst); |
---|
157 | whitespace = false; |
---|
158 | } else { |
---|
159 | if (dst.length() > 0 && whitespace) { |
---|
160 | dst.append(' '); |
---|
161 | } |
---|
162 | copyContent(buf, cursor, delimiters, dst); |
---|
163 | whitespace = false; |
---|
164 | } |
---|
165 | } |
---|
166 | return dst.toString(); |
---|
167 | } |
---|
168 | |
---|
169 | static void skipWhiteSpace(final ByteSequence buf, final ParserCursor cursor) { |
---|
170 | int pos = cursor.getPos(); |
---|
171 | int indexFrom = cursor.getPos(); |
---|
172 | int indexTo = cursor.getUpperBound(); |
---|
173 | for (int i = indexFrom; i < indexTo; i++) { |
---|
174 | char current = (char) (buf.byteAt(i) & 0xff); |
---|
175 | if (!CharsetUtil.isWhitespace(current)) { |
---|
176 | break; |
---|
177 | } else { |
---|
178 | pos++; |
---|
179 | } |
---|
180 | } |
---|
181 | cursor.updatePos(pos); |
---|
182 | } |
---|
183 | |
---|
184 | static void skipComment(final ByteSequence buf, final ParserCursor cursor) { |
---|
185 | if (cursor.atEnd()) { |
---|
186 | return; |
---|
187 | } |
---|
188 | int pos = cursor.getPos(); |
---|
189 | int indexFrom = cursor.getPos(); |
---|
190 | int indexTo = cursor.getUpperBound(); |
---|
191 | char current = (char) (buf.byteAt(pos) & 0xff); |
---|
192 | if (current != '(') { |
---|
193 | return; |
---|
194 | } |
---|
195 | pos++; |
---|
196 | indexFrom++; |
---|
197 | |
---|
198 | int level = 1; |
---|
199 | boolean escaped = false; |
---|
200 | for (int i = indexFrom; i < indexTo; i++, pos++) { |
---|
201 | current = (char) (buf.byteAt(i) & 0xff); |
---|
202 | if (escaped) { |
---|
203 | escaped = false; |
---|
204 | } else { |
---|
205 | if (current == '\\') { |
---|
206 | escaped = true; |
---|
207 | } else if (current == '(') { |
---|
208 | level++; |
---|
209 | } else if (current == ')') { |
---|
210 | level--; |
---|
211 | } |
---|
212 | } |
---|
213 | if (level <= 0) { |
---|
214 | pos++; |
---|
215 | break; |
---|
216 | } |
---|
217 | } |
---|
218 | cursor.updatePos(pos); |
---|
219 | } |
---|
220 | |
---|
221 | static void copyContent(final ByteSequence buf, final ParserCursor cursor, final int[] delimiters, |
---|
222 | final StringBuilder dst) { |
---|
223 | int pos = cursor.getPos(); |
---|
224 | int indexFrom = cursor.getPos(); |
---|
225 | int indexTo = cursor.getUpperBound(); |
---|
226 | for (int i = indexFrom; i < indexTo; i++) { |
---|
227 | char current = (char) (buf.byteAt(i) & 0xff); |
---|
228 | if (isOneOf(current, delimiters) || CharsetUtil.isWhitespace(current) || current == '(') { |
---|
229 | break; |
---|
230 | } else { |
---|
231 | pos++; |
---|
232 | dst.append(current); |
---|
233 | } |
---|
234 | } |
---|
235 | cursor.updatePos(pos); |
---|
236 | } |
---|
237 | |
---|
238 | static void copyQuotedContent(final ByteSequence buf, final ParserCursor cursor, |
---|
239 | final StringBuilder dst) { |
---|
240 | if (cursor.atEnd()) { |
---|
241 | return; |
---|
242 | } |
---|
243 | int pos = cursor.getPos(); |
---|
244 | int indexFrom = cursor.getPos(); |
---|
245 | int indexTo = cursor.getUpperBound(); |
---|
246 | char current = (char) (buf.byteAt(pos) & 0xff); |
---|
247 | if (current != '\"') { |
---|
248 | return; |
---|
249 | } |
---|
250 | pos++; |
---|
251 | indexFrom++; |
---|
252 | boolean escaped = false; |
---|
253 | for (int i = indexFrom; i < indexTo; i++, pos++) { |
---|
254 | current = (char) (buf.byteAt(i) & 0xff); |
---|
255 | if (escaped) { |
---|
256 | if (current != '\"' && current != '\\') { |
---|
257 | dst.append('\\'); |
---|
258 | } |
---|
259 | dst.append(current); |
---|
260 | escaped = false; |
---|
261 | } else { |
---|
262 | if (current == '\"') { |
---|
263 | pos++; |
---|
264 | break; |
---|
265 | } |
---|
266 | if (current == '\\') { |
---|
267 | escaped = true; |
---|
268 | } else if (current != '\r' && current != '\n') { |
---|
269 | dst.append(current); |
---|
270 | } |
---|
271 | } |
---|
272 | } |
---|
273 | cursor.updatePos(pos); |
---|
274 | } |
---|
275 | |
---|
276 | } |
---|