001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload.disk;
018
019import java.io.ByteArrayInputStream;
020import java.io.File;
021import java.io.FileOutputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.io.OutputStream;
025import java.io.UncheckedIOException;
026import java.io.UnsupportedEncodingException;
027import java.nio.file.Files;
028import java.util.Map;
029import java.util.UUID;
030import java.util.concurrent.atomic.AtomicInteger;
031
032import org.apache.commons.fileupload.FileItem;
033import org.apache.commons.fileupload.FileItemHeaders;
034import org.apache.commons.fileupload.FileUploadException;
035import org.apache.commons.fileupload.ParameterParser;
036import org.apache.commons.fileupload.util.Streams;
037import org.apache.commons.io.FileUtils;
038import org.apache.commons.io.function.Uncheck;
039import org.apache.commons.io.output.DeferredFileOutputStream;
040
041/**
042 * The default implementation of the {@link org.apache.commons.fileupload.FileItem FileItem} interface.
043 *
044 * <p>
045 * After retrieving an instance of this class from a {@link DiskFileItemFactory} instance (see {@link org.apache.commons.fileupload.servlet.ServletFileUpload
046 * #parseRequest(javax.servlet.http.HttpServletRequest)}), you may either request all contents of file at once using {@link #get()} or request an
047 * {@link java.io.InputStream InputStream} with {@link #getInputStream()} and process the file without attempting to load it into memory, which may come handy
048 * with large files.
049 * </p>
050 * <p>
051 * Temporary files, which are created for file items, should be deleted later on. The best way to do this is using a
052 * {@link org.apache.commons.io.FileCleaningTracker}, which you can set on the {@link DiskFileItemFactory}. However, if you do use such a tracker, then you must
053 * consider the following: Temporary files are automatically deleted as soon as they are no longer needed. (More precisely, when the corresponding instance of
054 * {@link java.io.File} is garbage collected.) This is done by the so-called reaper thread, which is started and stopped automatically by the
055 * {@link org.apache.commons.io.FileCleaningTracker} when there are files to be tracked. It might make sense to terminate that thread, for example, if your web
056 * application ends. See the section on "Resource cleanup" in the users guide of commons-fileupload.
057 * </p>
058 *
059 * @since FileUpload 1.1
060 */
061public class DiskFileItem implements FileItem {
062
063    /**
064     * Counter used in unique identifier generation.
065     */
066    private static final AtomicInteger COUNTER = new AtomicInteger(0);
067
068    /**
069     * Default content charset to be used when no explicit charset
070     * parameter is provided by the sender. Media subtypes of the
071     * "text" type are defined to have a default charset value of
072     * "ISO-8859-1" when received via HTTP.
073     */
074    public static final String DEFAULT_CHARSET = "ISO-8859-1";
075
076    /**
077     * UID used in unique file name generation.
078     */
079    private static final String UID = UUID.randomUUID().toString().replace('-', '_');
080
081    /**
082     * Returns an identifier that is unique within the class loader used to
083     * load this class, but does not have random-like appearance.
084     *
085     * @return A String with the non-random looking instance identifier.
086     */
087    private static String getUniqueId() {
088        final int limit = 100000000;
089        final int current = COUNTER.getAndIncrement();
090        String id = Integer.toString(current);
091
092        // If you manage to get more than 100 million of ids, you'll
093        // start getting ids longer than 8 characters.
094        if (current < limit) {
095            id = ("00000000" + id).substring(id.length());
096        }
097        return id;
098    }
099
100    /**
101     * Cached contents of the file.
102     */
103    private byte[] cachedContent;
104
105    /**
106     * The content type passed by the browser, or {@code null} if not defined.
107     */
108    private final String contentType;
109
110    /**
111     * Default content charset to be used when no explicit charset parameter is provided by the sender.
112     */
113    private String defaultCharset = DEFAULT_CHARSET;
114
115    /**
116     * Output stream for this item.
117     */
118    private transient DeferredFileOutputStream dfos;
119
120    /**
121     * The name of the form field as provided by the browser.
122     */
123    private String fieldName;
124
125    /**
126     * The original file name in the user's file system.
127     */
128    private final String fileName;
129
130    /**
131     * The file items headers.
132     */
133    private FileItemHeaders headers;
134
135    /**
136     * Whether or not this item is a simple form field.
137     */
138    private boolean formField;
139
140    /**
141     * The directory in which uploaded files will be stored, if stored on disk.
142     */
143    private final File repository;
144
145    /**
146     * The size of the item, in bytes. This is used to cache the size when a
147     * file item is moved from its original location.
148     */
149    private long size = -1;
150
151    /**
152     * The threshold above which uploads will be stored on disk.
153     */
154    private final int sizeThreshold;
155
156    /**
157     * The temporary file to use.
158     */
159    private transient File tempFile;
160
161    /**
162     * Constructs a new {@code DiskFileItem} instance.
163     *
164     * @param fieldName     The name of the form field.
165     * @param contentType   The content type passed by the browser or {@code null} if not specified.
166     * @param isFormField   Whether or not this item is a plain form field, as opposed to a file upload.
167     * @param fileName      The original file name in the user's file system, or {@code null} if not specified.
168     * @param sizeThreshold The threshold, in bytes, below which items will be retained in memory and above which they
169     *                      will be stored as a file.
170     * @param repository    The data repository, which is the directory in which files will be created, should the item
171     *                      size exceed the threshold.
172     */
173    public DiskFileItem(final String fieldName, final String contentType, final boolean isFormField,
174            final String fileName, final int sizeThreshold, final File repository) {
175        this.fieldName = fieldName;
176        this.contentType = contentType;
177        this.formField = isFormField;
178        this.fileName = fileName;
179        this.sizeThreshold = sizeThreshold;
180        this.repository = repository;
181    }
182
183    /**
184     * Clears the cache.
185     */
186    private void clear() {
187        cachedContent = null; // NOPMD
188    }
189
190    /**
191     * Deletes the underlying storage for a file item, including deleting any
192     * associated temporary disk file. Although this storage will be deleted
193     * automatically when the {@code FileItem} instance is garbage
194     * collected, this method can be used to ensure that this is done at an
195     * earlier time, thus preserving system resources.
196     */
197    @Override
198    public void delete() {
199        clear();
200        final File outputFile = getStoreLocation();
201        if (outputFile != null && !isInMemory() && outputFile.exists()) {
202            outputFile.delete();
203        }
204    }
205
206    /**
207     * Removes the file contents from the temporary storage.
208     *
209     * @throws Throwable Thrown by {@link Object#finalize()}.
210     */
211    @Override
212    protected void finalize() throws Throwable {
213        if (dfos == null || dfos.isInMemory()) {
214            return;
215        }
216        final File outputFile = dfos.getFile();
217        if (outputFile != null && outputFile.exists()) {
218            outputFile.delete();
219        }
220        super.finalize();
221    }
222
223    /**
224     * Gets the contents of the file as an array of bytes. If the contents of the file were not yet cached in memory, they will be loaded from the disk storage
225     * and cached.
226     *
227     * @return The contents of the file as an array of bytes or {@code null} if the data cannot be read.
228     * @throws UncheckedIOException if an I/O error occurs.
229     * @throws OutOfMemoryError     if an array of the required size cannot be allocated, for example the file is larger that {@code 2GB}.
230     */
231    @Override
232    public byte[] get() {
233        if (isInMemory()) {
234            if (cachedContent == null && dfos != null) {
235                cachedContent = dfos.getData();
236            }
237            return cachedContent != null ? cachedContent.clone() : new byte[0];
238        }
239        return Uncheck.get(() -> Files.readAllBytes(dfos.getFile().toPath()));
240    }
241
242    /**
243     * Gets the content charset passed by the agent or {@code null} if not defined.
244     *
245     * @return The content charset passed by the agent or {@code null} if not defined.
246     */
247    public String getCharSet() {
248        final ParameterParser parser = new ParameterParser();
249        parser.setLowerCaseNames(true);
250        // Parameter parser can handle null input
251        final Map<String, String> params = parser.parse(getContentType(), ';');
252        return params.get("charset");
253    }
254
255    /**
256     * Gets the content type passed by the agent or {@code null} if not defined.
257     *
258     * @return The content type passed by the agent or {@code null} if not defined.
259     */
260    @Override
261    public String getContentType() {
262        return contentType;
263    }
264
265    /**
266     * Gets the default charset for use when no explicit charset parameter is provided by the sender.
267     *
268     * @return the default charset
269     */
270    public String getDefaultCharset() {
271        return defaultCharset;
272    }
273
274    /**
275     * Gets the name of the field in the multipart form corresponding to this file item.
276     *
277     * @return The name of the form field.
278     * @see #setFieldName(String)
279     */
280    @Override
281    public String getFieldName() {
282        return fieldName;
283    }
284
285    /**
286     * Gets the file item headers.
287     *
288     * @return The file items headers.
289     */
290    @Override
291    public FileItemHeaders getHeaders() {
292        return headers;
293    }
294
295    /**
296     * Gets an {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
297     *
298     * @return An {@link java.io.InputStream InputStream} that can be used to retrieve the contents of the file.
299     *
300     * @throws IOException if an error occurs.
301     */
302    @Override
303    public InputStream getInputStream() throws IOException {
304        if (!isInMemory()) {
305            return Files.newInputStream(dfos.getPath());
306        }
307        if (cachedContent == null) {
308            cachedContent = dfos.getData();
309        }
310        return new ByteArrayInputStream(cachedContent);
311    }
312
313    /**
314     * Gets the original file name in the client's file system.
315     *
316     * @return The original file name in the client's file system.
317     * @throws org.apache.commons.fileupload.InvalidFileNameException The file name contains a NUL character,
318     *   which might be an indicator of a security attack. If you intend to
319     *   use the file name anyways, catch the exception and use
320     *   {@link org.apache.commons.fileupload.InvalidFileNameException#getName()}.
321     */
322    @Override
323    public String getName() {
324        return Streams.checkFileName(fileName);
325    }
326
327    /**
328     * Gets an {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
329     *
330     * @return An {@link java.io.OutputStream OutputStream} that can be used for storing the contents of the file.
331     *
332     * @throws IOException if an error occurs (never happens).
333     */
334    @Override
335    public OutputStream getOutputStream() throws IOException {
336        if (dfos == null) {
337            final File outputFile = getTempFile();
338            // @formatter:off
339            dfos = DeferredFileOutputStream.builder()
340                    .setThreshold(sizeThreshold)
341                    .setOutputFile(outputFile)
342                    .get();
343            // @formatter:on
344        }
345        return dfos;
346    }
347
348    /**
349     * Gets the size of the file.
350     *
351     * @return The size of the file, in bytes.
352     */
353    @Override
354    public long getSize() {
355        if (size >= 0) {
356            return size;
357        }
358        if (cachedContent != null) {
359            return cachedContent.length;
360        }
361        if (dfos.isInMemory()) {
362            return dfos.getData().length;
363        }
364        return dfos.getFile().length();
365    }
366
367    /**
368     * Gets the {@link java.io.File} object for the {@code FileItem}'s
369     * data's temporary location on the disk. Note that for
370     * {@code FileItem}s that have their data stored in memory,
371     * this method will return {@code null}. When handling large
372     * files, you can use {@link java.io.File#renameTo(java.io.File)} to
373     * move the file to new location without copying the data, if the
374     * source and destination locations reside within the same logical
375     * volume.
376     *
377     * @return The data file, or {@code null} if the data is stored in memory.
378     */
379    public File getStoreLocation() {
380        if (dfos == null) {
381            return null;
382        }
383        if (isInMemory()) {
384            return null;
385        }
386        return dfos.getFile();
387    }
388
389    /**
390     * Gets the contents of the file as a String, using the default character encoding. This method uses
391     * {@link #get()} to retrieve the contents of the file.
392     * <p>
393     * <strong>TODO</strong> Consider making this method throw UnsupportedEncodingException.
394     *
395     * @return The contents of the file, as a string.
396     */
397    @Override
398    public String getString() {
399        final byte[] rawData = get();
400        String charset = getCharSet();
401        if (charset == null) {
402            charset = defaultCharset;
403        }
404        try {
405            return new String(rawData, charset);
406        } catch (final UnsupportedEncodingException e) {
407            return "";
408        }
409    }
410
411    /**
412     * Gets the contents of the file as a String, using the specified encoding. This method uses {@link #get()} to
413     * retrieve the contents of the file.
414     *
415     * @param charset The charset to use.
416     * @return The contents of the file, as a string.
417     * @throws UnsupportedEncodingException if the requested character encoding is not available.
418     */
419    @Override
420    public String getString(final String charset) throws UnsupportedEncodingException {
421        return new String(get(), charset);
422    }
423
424    /**
425     * Creates and returns a {@link java.io.File File} representing a uniquely named temporary file in the configured
426     * repository path. The lifetime of the file is tied to the lifetime of the {@code FileItem} instance; the file will
427     * be deleted when the instance is garbage collected.
428     * <p>
429     * <b>Note: Subclasses that override this method must ensure that they return the same File each time.</b>
430     *
431     * @return The {@link java.io.File File} to be used for temporary storage.
432     */
433    protected File getTempFile() {
434        if (tempFile == null) {
435            File tempDir = repository;
436            if (tempDir == null) {
437                tempDir = new File(System.getProperty("java.io.tmpdir"));
438            }
439
440            final String tempFileName = String.format("upload_%s_%s.tmp", UID, getUniqueId());
441
442            tempFile = new File(tempDir, tempFileName);
443        }
444        return tempFile;
445    }
446
447    /**
448     * Tests whether or not a {@code FileItem} instance represents a simple form field.
449     *
450     * @return {@code true} if the instance represents a simple form field; {@code false} if it represents an uploaded
451     *             file.
452     *
453     * @see #setFormField(boolean)
454     */
455    @Override
456    public boolean isFormField() {
457        return formField;
458    }
459
460    /**
461     * Provides a hint as to whether or not the file contents will be read from memory.
462     *
463     * @return {@code true} if the file contents will be read from memory; {@code false} otherwise.
464     */
465    @Override
466    public boolean isInMemory() {
467        if (cachedContent != null) {
468            return true;
469        }
470        return dfos.isInMemory();
471    }
472
473    /**
474     * Sets the default charset for use when no explicit charset parameter is provided by the sender.
475     *
476     * @param charset the default charset
477     */
478    public void setDefaultCharset(final String charset) {
479        defaultCharset = charset;
480    }
481
482    /**
483     * Sets the field name used to reference this file item.
484     *
485     * @param fieldName The name of the form field.
486     * @see #getFieldName()
487     */
488    @Override
489    public void setFieldName(final String fieldName) {
490        this.fieldName = fieldName;
491    }
492
493    /**
494     * Sets whether or not a {@code FileItem} instance represents a simple form field.
495     *
496     * @param formField {@code true} if the instance represents a simple form
497     *              field; {@code false} if it represents an uploaded file.
498     *
499     * @see #isFormField()
500     */
501    @Override
502    public void setFormField(final boolean formField) {
503        this.formField = formField;
504    }
505
506    /**
507     * Sets the file item headers.
508     *
509     * @param headers The file items headers.
510     */
511    @Override
512    public void setHeaders(final FileItemHeaders headers) {
513        this.headers = headers;
514    }
515
516    /**
517     * Returns a string representation of this object.
518     *
519     * @return a string representation of this object.
520     */
521    @Override
522    public String toString() {
523        return String.format("name=%s, StoreLocation=%s, size=%s bytes, isFormField=%s, FieldName=%s",
524                getName(), getStoreLocation(), Long.valueOf(getSize()), Boolean.valueOf(isFormField()), getFieldName());
525    }
526
527    /**
528     * A convenience method to write an uploaded item to disk. The client code
529     * is not concerned with whether or not the item is stored in memory, or on
530     * disk in a temporary location. They just want to write the uploaded item
531     * to a file.
532     * <p>
533     * This implementation first attempts to rename the uploaded item to the
534     * specified destination file, if the item was originally written to disk.
535     * Otherwise, the data will be copied to the specified file.
536     * </p>
537     * <p>
538     * This method is only guaranteed to work <em>once</em>, the first time it
539     * is invoked for a particular item. This is because, in the event that the
540     * method renames a temporary file, that file will no longer be available
541     * to copy or rename again at a later time.
542     * </p>
543     *
544     * @param file The {@code File} into which the uploaded item should
545     *             be stored.
546     * @throws Exception if an error occurs.
547     */
548    @Override
549    public void write(final File file) throws Exception {
550        if (isInMemory()) {
551            try (FileOutputStream fout = new FileOutputStream(file);) {
552                fout.write(get());
553            } catch (final IOException ignore) {
554                // ignore
555            }
556        } else {
557            final File outputFile = getStoreLocation();
558            if (outputFile == null) {
559                /*
560                 * For whatever reason we cannot write the file to disk.
561                 */
562                throw new FileUploadException("Cannot write uploaded file to disk!");
563            }
564            // Save the length of the file
565            size = outputFile.length();
566            /*
567             * The uploaded file is being stored on disk in a temporary location so move it to the desired file.
568             */
569            if (file.exists() && !file.delete()) {
570                throw new FileUploadException("Cannot write uploaded file to disk!");
571            }
572            FileUtils.moveFile(outputFile, file);
573        }
574    }
575}