org.apache.solr.common.util.ContentStreamBase.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.solr.common.util.ContentStreamBase.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.common.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import java.util.Locale;
import java.util.function.Predicate;
import java.util.zip.GZIPInputStream;

import org.apache.http.entity.ContentType;
import org.apache.solr.client.solrj.SolrRequest;
import org.apache.solr.client.solrj.request.RequestWriter;

/**
 * Three concrete implementations for ContentStream - one for File/URL/String
 * 
 *
 * @since solr 1.2
 */
public abstract class ContentStreamBase implements ContentStream {

    public static final String DEFAULT_CHARSET = StandardCharsets.UTF_8.name();
    private static final String TEXT_CSV = "text/csv";
    private static final List<String> UNHELPFUL_TYPES = Arrays
            .asList(ContentType.APPLICATION_OCTET_STREAM.getMimeType(), "application/gzip", "content/unknown");
    private static final List<String> XML_SUF = Arrays.asList(".xml", ".xml.gz", ".xml.gzip");
    private static final List<String> JSON_SUF = Arrays.asList(".json", ".json.gz", ".json.gzip");
    private static final List<String> CSV_SUF = Arrays.asList(".csv", ".csv.gz", ".csv.gzip");

    protected String name;
    protected String sourceInfo;
    protected String contentType;
    protected Long size;

    //---------------------------------------------------------------------
    //---------------------------------------------------------------------

    public static String getCharsetFromContentType(String contentType) {
        if (contentType != null) {
            int idx = contentType.toLowerCase(Locale.ROOT).indexOf("charset=");
            if (idx > 0) {
                return contentType.substring(idx + "charset=".length()).trim();
            }
        }
        return null;
    }

    protected String attemptToDetermineContentType() {
        String type = null;
        if (name != null) {
            Predicate<String> endsWith = suffix -> name.toLowerCase(Locale.ROOT).endsWith(suffix);

            if (XML_SUF.stream().anyMatch(endsWith)) {
                type = ContentType.APPLICATION_XML.getMimeType();
            } else if (JSON_SUF.stream().anyMatch(endsWith)) {
                type = ContentType.APPLICATION_JSON.getMimeType();
            } else if (CSV_SUF.stream().anyMatch(endsWith)) {
                type = TEXT_CSV;
            } else {
                type = attemptToDetermineTypeFromFirstCharacter();
            }
        }
        return type;
    }

    private String attemptToDetermineTypeFromFirstCharacter() {
        String type = null;
        try (InputStream stream = getStream()) {
            // Last ditch effort to determine content, if the first non-white space
            // is a '<' or '{', assume xml or json.
            int data = stream.read();
            while ((data != -1) && (((char) data) == ' ')) {
                data = stream.read();
            }
            if ((char) data == '<') {
                type = ContentType.APPLICATION_XML.getMimeType();
            } else if ((char) data == '{') {
                type = ContentType.APPLICATION_JSON.getMimeType();
            }
        } catch (Exception ex) {
            // This code just eats, the exception and leaves
            // the contentType untouched.
        }
        return type;
    }

    //------------------------------------------------------------------------
    //------------------------------------------------------------------------

    /**
     * Construct a <code>ContentStream</code> from a <code>URL</code>
     * 
     * This uses a <code>URLConnection</code> to get the content stream
     * @see  URLConnection
     */
    public static class URLStream extends ContentStreamBase {
        private final URL url;

        public URLStream(URL url) {
            this.url = url;
            sourceInfo = "url";
        }

        @Override
        public String getContentType() {
            // for file:// streams that are octet-streams, try to determine the payload
            // type from payload rather than just using the mime type.
            if ("file".equals(url.getProtocol())) {
                Predicate<String> equals = mimeType -> mimeType.equals(contentType);
                if (UNHELPFUL_TYPES.stream().anyMatch(equals)) {
                    String type = attemptToDetermineContentType();
                    contentType = (type != null) ? type : contentType;
                }
            }
            return contentType;
        }

        @Override
        public InputStream getStream() throws IOException {
            URLConnection conn = this.url.openConnection();

            contentType = conn.getContentType();
            name = url.toExternalForm();
            size = conn.getContentLengthLong();
            InputStream is = conn.getInputStream();
            String urlFile = url.getFile().toLowerCase(Locale.ROOT);
            if ("gzip".equals(conn.getContentEncoding()) || urlFile.endsWith(".gz") || urlFile.endsWith(".gzip")) {
                is = new GZIPInputStream(is);
            }
            return is;
        }
    }

    /**
     * Construct a <code>ContentStream</code> from a <code>File</code>
     */
    public static class FileStream extends ContentStreamBase {
        private final File file;

        public FileStream(File f) {
            file = f;

            contentType = null; // ??
            name = file.getName();
            size = file.length();
            sourceInfo = file.toURI().toString();
        }

        @Override
        public String getContentType() {
            if (contentType == null) {
                contentType = attemptToDetermineContentType();
            }
            return contentType;
        }

        @Override
        public InputStream getStream() throws IOException {
            InputStream is = new FileInputStream(file);
            String lowerName = name.toLowerCase(Locale.ROOT);
            if (lowerName.endsWith(".gz") || lowerName.endsWith(".gzip")) {
                is = new GZIPInputStream(is);
            }
            return is;
        }
    }

    /**
     * Construct a <code>ContentStream</code> from a <code>String</code>
     */
    public static class StringStream extends ContentStreamBase {
        private final String str;

        public StringStream(String str) {
            this(str, detect(str));
        }

        public StringStream(String str, String contentType) {
            this.str = str;
            this.contentType = contentType;
            name = null;
            try {
                size = (long) str.getBytes(DEFAULT_CHARSET).length;
            } catch (UnsupportedEncodingException e) {
                // won't happen
                throw new RuntimeException(e);
            }
            sourceInfo = "string";
        }

        public static String detect(String str) {
            String detectedContentType = null;
            int lim = str.length() - 1;
            for (int i = 0; i < lim; i++) {
                char ch = str.charAt(i);
                if (Character.isWhitespace(ch)) {
                    continue;
                }
                // first non-whitespace chars
                if (ch == '#' // single line comment
                        || (ch == '/' && (str.charAt(i + 1) == '/' || str.charAt(i + 1) == '*')) // single line or multi-line comment
                        || (ch == '{' || ch == '[') // start of JSON object
                ) {
                    detectedContentType = "application/json";
                } else if (ch == '<') {
                    detectedContentType = "text/xml";
                }
                break;
            }
            return detectedContentType;
        }

        @Override
        public InputStream getStream() throws IOException {
            return new ByteArrayInputStream(str.getBytes(DEFAULT_CHARSET));
        }

        /**
         * If an charset is defined (by the contentType) use that, otherwise 
         * use a StringReader
         */
        @Override
        public Reader getReader() throws IOException {
            String charset = getCharsetFromContentType(contentType);
            return charset == null ? new StringReader(str) : new InputStreamReader(getStream(), charset);
        }
    }

    /**
     * Base reader implementation.  If the contentType declares a 
     * charset use it, otherwise use "utf-8".
     */
    @Override
    public Reader getReader() throws IOException {
        String charset = getCharsetFromContentType(getContentType());
        return charset == null ? new InputStreamReader(getStream(), DEFAULT_CHARSET)
                : new InputStreamReader(getStream(), charset);
    }

    //------------------------------------------------------------------
    // Getters / Setters for overrideable attributes
    //------------------------------------------------------------------

    @Override
    public String getContentType() {
        return contentType;
    }

    public void setContentType(String contentType) {
        this.contentType = contentType;
    }

    @Override
    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    @Override
    public Long getSize() {
        return size;
    }

    public void setSize(Long size) {
        this.size = size;
    }

    @Override
    public String getSourceInfo() {
        return sourceInfo;
    }

    public void setSourceInfo(String sourceInfo) {
        this.sourceInfo = sourceInfo;
    }

    public static ContentStream create(RequestWriter requestWriter, SolrRequest req) throws IOException {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        RequestWriter.ContentWriter contentWriter = requestWriter.getContentWriter(req);
        contentWriter.write(baos);
        return new ByteArrayStream(baos.toByteArray(), null, contentWriter.getContentType());
    }

    /**
     * Construct a <code>ContentStream</code> from a <code>File</code>
     */
    public static class ByteArrayStream extends ContentStreamBase {
        private final byte[] bytes;

        public ByteArrayStream(byte[] bytes, String source) {
            this(bytes, source, null);
        }

        public ByteArrayStream(byte[] bytes, String source, String contentType) {
            this.bytes = bytes;

            this.contentType = contentType;
            name = source;
            size = (long) bytes.length;
            sourceInfo = source;
        }

        @Override
        public InputStream getStream() throws IOException {
            return new ByteArrayInputStream(bytes);
        }
    }
}