1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.fileupload.util.mime;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.UnsupportedEncodingException;
21 import java.util.Base64;
22 import java.util.HashMap;
23 import java.util.Locale;
24 import java.util.Map;
25
26
27
28
29
30
31 public final class MimeUtility {
32
33
34
35
36 private static final String US_ASCII_CHARSET = "US-ASCII";
37
38
39
40
41 private static final String BASE64_ENCODING_MARKER = "B";
42
43
44
45
46 private static final String QUOTEDPRINTABLE_ENCODING_MARKER = "Q";
47
48
49
50
51 private static final String ENCODED_TOKEN_MARKER = "=?";
52
53
54
55
56 private static final String ENCODED_TOKEN_FINISHER = "?=";
57
58
59
60
61 private static final String LINEAR_WHITESPACE = " \t\r\n";
62
63
64
65
66 private static final Map<String, String> MIME2JAVA = new HashMap<>();
67
68 static {
69 MIME2JAVA.put("iso-2022-cn", "ISO2022CN");
70 MIME2JAVA.put("iso-2022-kr", "ISO2022KR");
71 MIME2JAVA.put("utf-8", "UTF8");
72 MIME2JAVA.put("utf8", "UTF8");
73 MIME2JAVA.put("ja_jp.iso2022-7", "ISO2022JP");
74 MIME2JAVA.put("ja_jp.eucjp", "EUCJIS");
75 MIME2JAVA.put("euc-kr", "KSC5601");
76 MIME2JAVA.put("euckr", "KSC5601");
77 MIME2JAVA.put("us-ascii", "ISO-8859-1");
78 MIME2JAVA.put("x-us-ascii", "ISO-8859-1");
79 }
80
81
82
83
84
85
86
87
88
89
90
91 public static String decodeText(final String text) throws UnsupportedEncodingException {
92
93
94 if (!text.contains(ENCODED_TOKEN_MARKER)) {
95 return text;
96 }
97
98 int offset = 0;
99 final int endOffset = text.length();
100
101 int startWhiteSpace = -1;
102 int endWhiteSpace = -1;
103
104 final StringBuilder decodedText = new StringBuilder(text.length());
105
106 boolean previousTokenEncoded = false;
107
108 while (offset < endOffset) {
109 char ch = text.charAt(offset);
110
111
112 if (LINEAR_WHITESPACE.indexOf(ch) != -1) {
113 startWhiteSpace = offset;
114 while (offset < endOffset) {
115
116 ch = text.charAt(offset);
117 if (LINEAR_WHITESPACE.indexOf(ch) == -1) {
118
119
120 endWhiteSpace = offset;
121 break;
122 }
123 offset++;
124 }
125 } else {
126
127 final int wordStart = offset;
128
129 while (offset < endOffset) {
130
131 ch = text.charAt(offset);
132 if (LINEAR_WHITESPACE.indexOf(ch) != -1) {
133 break;
134 }
135 offset++;
136
137
138 }
139
140 final String word = text.substring(wordStart, offset);
141
142 if (word.startsWith(ENCODED_TOKEN_MARKER)) {
143 try {
144
145 final String decodedWord = decodeWord(word);
146
147
148 if (!previousTokenEncoded && startWhiteSpace != -1) {
149 decodedText.append(text, startWhiteSpace, endWhiteSpace);
150 startWhiteSpace = -1;
151 }
152
153 previousTokenEncoded = true;
154
155 decodedText.append(decodedWord);
156
157
158 continue;
159
160 } catch (final ParseException e) {
161
162 }
163 }
164
165
166 if (startWhiteSpace != -1) {
167 decodedText.append(text, startWhiteSpace, endWhiteSpace);
168 startWhiteSpace = -1;
169 }
170
171 previousTokenEncoded = false;
172 decodedText.append(word);
173 }
174 }
175
176 return decodedText.toString();
177 }
178
179
180
181
182
183
184
185
186
187
188
189
190 private static String decodeWord(final String word) throws ParseException, UnsupportedEncodingException {
191
192
193
194 if (!word.startsWith(ENCODED_TOKEN_MARKER)) {
195 throw new ParseException("Invalid RFC 2047 encoded-word: " + word);
196 }
197
198 final int charsetPos = word.indexOf('?', 2);
199 if (charsetPos == -1) {
200 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word);
201 }
202
203
204 final String charset = word.substring(2, charsetPos).toLowerCase(Locale.ROOT);
205
206
207 final int encodingPos = word.indexOf('?', charsetPos + 1);
208 if (encodingPos == -1) {
209 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word);
210 }
211
212 final String encoding = word.substring(charsetPos + 1, encodingPos);
213
214
215 final int encodedTextPos = word.indexOf(ENCODED_TOKEN_FINISHER, encodingPos + 1);
216 if (encodedTextPos == -1) {
217 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word);
218 }
219
220 final String encodedText = word.substring(encodingPos + 1, encodedTextPos);
221
222
223 if (encodedText.isEmpty()) {
224 return "";
225 }
226
227 try {
228
229 final ByteArrayOutputStream out = new ByteArrayOutputStream(encodedText.length());
230
231 final byte[] encodedData = encodedText.getBytes(US_ASCII_CHARSET);
232
233
234 if (encoding.equals(BASE64_ENCODING_MARKER)) {
235 out.write(Base64.getDecoder().decode(encodedData));
236 } else if (encoding.equals(QUOTEDPRINTABLE_ENCODING_MARKER)) {
237 QuotedPrintableDecoder.decode(encodedData, out);
238 } else {
239 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
240 }
241
242 final byte[] decodedData = out.toByteArray();
243 return new String(decodedData, javaCharset(charset));
244 } catch (final Exception e) {
245 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
246 }
247 }
248
249
250
251
252
253
254
255
256 private static String javaCharset(final String charset) {
257
258 if (charset == null) {
259 return null;
260 }
261
262 final String mappedCharset = MIME2JAVA.get(charset.toLowerCase(Locale.ROOT));
263
264
265 if (mappedCharset == null) {
266 return charset;
267 }
268 return mappedCharset;
269 }
270
271
272
273
274 private MimeUtility() {
275
276 }
277
278 }