1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.fileupload;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.UnsupportedEncodingException;
22
23 /**
24 * Utility class to decode/encode character set on HTTP Header fields based on RFC 2231. This implementation adheres to RFC 5987 in particular, which was
25 * defined for HTTP headers.
26 * <p>
27 * RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a> and
28 * <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
29 * </p>
30 *
31 * @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
32 * @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
33 */
34 final class RFC2231Utility {
35
36 /**
37 * Percent character '{@value}'.
38 */
39 private static final char PERCENT = '%';
40 /**
41 * The Hexadecimal values char array.
42 */
43 private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
44 /**
45 * The Hexadecimal representation of 127.
46 */
47 private static final byte MASK = 0x7f;
48 /**
49 * The Hexadecimal representation of 128.
50 */
51 private static final int MASK_128 = 0x80;
52 /**
53 * The Hexadecimal decode value.
54 */
55 private static final byte[] HEX_DECODE = new byte[MASK_128];
56 // create a ASCII decoded array of Hexadecimal values
57 static {
58 for (int i = 0; i < HEX_DIGITS.length; i++) {
59 HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
60 HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
61 }
62 }
63
64 /**
65 * Decodes a string of text obtained from a HTTP header as per RFC 2231
66 * <p>
67 * <strong>Eg 1.</strong> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A} will be decoded to {@code This is ***fun***}
68 * </p>
69 * <p>
70 * <strong>Eg 2.</strong> {@code iso-8859-1'en'%A3%20rate} will be decoded to {@code £ rate}
71 * </p>
72 * <p>
73 * <strong>Eg 3.</strong> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates} will be decoded to {@code £ and € rates}
74 * </p>
75 *
76 * @param encodedText Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only
77 * @return Decoded text based on charset encoding
78 * @throws UnsupportedEncodingException The requested character set wasn't found.
79 */
80 static String decodeText(final String encodedText) throws UnsupportedEncodingException {
81 final int langDelimitStart = encodedText.indexOf('\'');
82 if (langDelimitStart == -1) {
83 // missing charset
84 return encodedText;
85 }
86 final String mimeCharset = encodedText.substring(0, langDelimitStart);
87 final int langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
88 if (langDelimitEnd == -1) {
89 // missing language
90 return encodedText;
91 }
92 final byte[] bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
93 return new String(bytes, getJavaCharset(mimeCharset));
94 }
95
96 /**
97 * Converts {@code text} to their corresponding Hex value.
98 *
99 * @param text ASCII text input
100 * @return Byte array of characters decoded from ASCII table
101 */
102 private static byte[] fromHex(final String text) {
103 final int shift = 4;
104 final ByteArrayOutputStream out = new ByteArrayOutputStream(text.length());
105 for (int i = 0; i < text.length();) {
106 final char c = text.charAt(i++);
107 if (c == PERCENT) {
108 if (i > text.length() - 2) {
109 break; // unterminated sequence
110 }
111 final byte b1 = HEX_DECODE[text.charAt(i++) & MASK];
112 final byte b2 = HEX_DECODE[text.charAt(i++) & MASK];
113 out.write(b1 << shift | b2);
114 } else {
115 out.write((byte) c);
116 }
117 }
118 return out.toByteArray();
119 }
120
121 private static String getJavaCharset(final String mimeCharset) {
122 // good enough for standard values
123 return mimeCharset;
124 }
125
126 /**
127 * Checks if Asterisk (*) at the end of parameter name to indicate, if it has charset and language information to decode the value.
128 *
129 * @param paramName The parameter, which is being checked.
130 * @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
131 */
132 static boolean hasEncodedValue(final String paramName) {
133 if (paramName != null) {
134 return paramName.lastIndexOf('*') == paramName.length() - 1;
135 }
136 return false;
137 }
138
139 /**
140 * If {@code paramName} has Asterisk (*) at the end, it will be stripped off, else the passed value will be returned.
141 *
142 * @param paramName The parameter, which is being inspected.
143 * @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
144 */
145 static String stripDelimiter(final String paramName) {
146 if (hasEncodedValue(paramName)) {
147 final StringBuilder paramBuilder = new StringBuilder(paramName);
148 paramBuilder.deleteCharAt(paramName.lastIndexOf('*'));
149 return paramBuilder.toString();
150 }
151 return paramName;
152 }
153
154 /**
155 * Private constructor so that no instances can be created. This class contains only static utility methods.
156 */
157 private RFC2231Utility() {
158 }
159 }