View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.fileupload;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.UnsupportedEncodingException;
22  
23  /**
24   * Utility class to decode/encode character set on HTTP Header fields based on RFC 2231. This implementation adheres to RFC 5987 in particular, which was
25   * defined for HTTP headers.
26   * <p>
27   * RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a> and
28   * <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
29   * </p>
30   *
31   * @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
32   * @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
33   */
34  final class RFC2231Utility {
35  
36      /**
37       * Percent character '{@value}'.
38       */
39      private static final char PERCENT = '%';
40      /**
41       * The Hexadecimal values char array.
42       */
43      private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
44      /**
45       * The Hexadecimal representation of 127.
46       */
47      private static final byte MASK = 0x7f;
48      /**
49       * The Hexadecimal representation of 128.
50       */
51      private static final int MASK_128 = 0x80;
52      /**
53       * The Hexadecimal decode value.
54       */
55      private static final byte[] HEX_DECODE = new byte[MASK_128];
56      // create a ASCII decoded array of Hexadecimal values
57      static {
58          for (int i = 0; i < HEX_DIGITS.length; i++) {
59              HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
60              HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
61          }
62      }
63  
64      /**
65       * Decodes a string of text obtained from a HTTP header as per RFC 2231
66       * <p>
67       * <strong>Eg 1.</strong> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A} will be decoded to {@code This is ***fun***}
68       * </p>
69       * <p>
70       * <strong>Eg 2.</strong> {@code iso-8859-1'en'%A3%20rate} will be decoded to {@code £ rate}
71       * </p>
72       * <p>
73       * <strong>Eg 3.</strong> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates} will be decoded to {@code £ and € rates}
74       * </p>
75       *
76       * @param encodedText Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only
77       * @return Decoded text based on charset encoding
78       * @throws UnsupportedEncodingException The requested character set wasn't found.
79       */
80      static String decodeText(final String encodedText) throws UnsupportedEncodingException {
81          final int langDelimitStart = encodedText.indexOf('\'');
82          if (langDelimitStart == -1) {
83              // missing charset
84              return encodedText;
85          }
86          final String mimeCharset = encodedText.substring(0, langDelimitStart);
87          final int langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
88          if (langDelimitEnd == -1) {
89              // missing language
90              return encodedText;
91          }
92          final byte[] bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
93          return new String(bytes, getJavaCharset(mimeCharset));
94      }
95  
96      /**
97       * Converts {@code text} to their corresponding Hex value.
98       *
99       * @param text ASCII text input
100      * @return Byte array of characters decoded from ASCII table
101      */
102     private static byte[] fromHex(final String text) {
103         final int shift = 4;
104         final ByteArrayOutputStream out = new ByteArrayOutputStream(text.length());
105         for (int i = 0; i < text.length();) {
106             final char c = text.charAt(i++);
107             if (c == PERCENT) {
108                 if (i > text.length() - 2) {
109                     break; // unterminated sequence
110                 }
111                 final byte b1 = HEX_DECODE[text.charAt(i++) & MASK];
112                 final byte b2 = HEX_DECODE[text.charAt(i++) & MASK];
113                 out.write(b1 << shift | b2);
114             } else {
115                 out.write((byte) c);
116             }
117         }
118         return out.toByteArray();
119     }
120 
121     private static String getJavaCharset(final String mimeCharset) {
122         // good enough for standard values
123         return mimeCharset;
124     }
125 
126     /**
127      * Checks if Asterisk (*) at the end of parameter name to indicate, if it has charset and language information to decode the value.
128      *
129      * @param paramName The parameter, which is being checked.
130      * @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
131      */
132     static boolean hasEncodedValue(final String paramName) {
133         if (paramName != null) {
134             return paramName.lastIndexOf('*') == paramName.length() - 1;
135         }
136         return false;
137     }
138 
139     /**
140      * If {@code paramName} has Asterisk (*) at the end, it will be stripped off, else the passed value will be returned.
141      *
142      * @param paramName The parameter, which is being inspected.
143      * @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
144      */
145     static String stripDelimiter(final String paramName) {
146         if (hasEncodedValue(paramName)) {
147             final StringBuilder paramBuilder = new StringBuilder(paramName);
148             paramBuilder.deleteCharAt(paramName.lastIndexOf('*'));
149             return paramBuilder.toString();
150         }
151         return paramName;
152     }
153 
154     /**
155      * Private constructor so that no instances can be created. This class contains only static utility methods.
156      */
157     private RFC2231Utility() {
158     }
159 }